[PATCH 0/3] ARC: cache: allow to autodetect L1 cache line size

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH 0/3] ARC: cache: allow to autodetect L1 cache line size
@ 2019-04-16 17:10 ` Eugeniy Paltsev
  0 siblings, 0 replies; 12+ messages in thread
From: Eugeniy Paltsev @ 2019-04-16 17:10 UTC (permalink / raw)
  To: linux-snps-arc, Vineet Gupta
  Cc: linux-kernel, Alexey Brodkin, Eugeniy Paltsev

Allow to autodetect L1 I/D caches line size in runtime instead of
relying on value provided via kconfig.

This patch series is inspired by https://gitsnps.internal.synopsys.com/
arc_oss/linux/commit/c229c6e2e7a62269e2929943ccda2b04149e5b27
however has the following improvements:
 * we don't touch any generic code, so it's easier to upstream it.
 * autodetection can be disabled via kconfig.
 * changes are rebased on latest kernel version.

Eugeniy Paltsev (3):
  ARC: cache: declare cache-related defines via ARC_MAX_CACHE_SHIFT
  ARC: cache: check cache configuration on each CPU
  ARC: cache: allow to autodetect L1 cache line size

 arch/arc/Kconfig             |  10 +++
 arch/arc/include/asm/cache.h |  20 ++++--
 arch/arc/lib/memset-archs.S  |   8 ++-
 arch/arc/mm/cache.c          | 147 ++++++++++++++++++++++++++++++++-----------
 4 files changed, 141 insertions(+), 44 deletions(-)

-- 
2.14.5

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 0/3] ARC: cache: allow to autodetect L1 cache line size
@ 2019-04-16 17:10 ` Eugeniy Paltsev
  0 siblings, 0 replies; 12+ messages in thread
From: Eugeniy Paltsev @ 2019-04-16 17:10 UTC (permalink / raw)
  To: linux-snps-arc

Allow to autodetect L1 I/D caches line size in runtime instead of
relying on value provided via kconfig.

This patch series is inspired by https://gitsnps.internal.synopsys.com/
arc_oss/linux/commit/c229c6e2e7a62269e2929943ccda2b04149e5b27
however has the following improvements:
 * we don't touch any generic code, so it's easier to upstream it.
 * autodetection can be disabled via kconfig.
 * changes are rebased on latest kernel version.

Eugeniy Paltsev (3):
  ARC: cache: declare cache-related defines via ARC_MAX_CACHE_SHIFT
  ARC: cache: check cache configuration on each CPU
  ARC: cache: allow to autodetect L1 cache line size

 arch/arc/Kconfig             |  10 +++
 arch/arc/include/asm/cache.h |  20 ++++--
 arch/arc/lib/memset-archs.S  |   8 ++-
 arch/arc/mm/cache.c          | 147 ++++++++++++++++++++++++++++++++-----------
 4 files changed, 141 insertions(+), 44 deletions(-)

-- 
2.14.5

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 1/3] ARC: cache: declare cache-related defines via ARC_MAX_CACHE_SHIFT
  2019-04-16 17:10 ` Eugeniy Paltsev
@ 2019-04-16 17:10   ` Eugeniy Paltsev
  -1 siblings, 0 replies; 12+ messages in thread
From: Eugeniy Paltsev @ 2019-04-16 17:10 UTC (permalink / raw)
  To: linux-snps-arc, Vineet Gupta
  Cc: linux-kernel, Alexey Brodkin, Eugeniy Paltsev

 * Declare all cache-related defines where we need maximum cache
line size (like SMP_CACHE_BYTES, ARCH_DMA_MINALIGN, etc...) via
one common ARC_MAX_CACHE_SHIFT define.

 * Move all cache-related defines outside '#ifdef __ASSEMBLY__'
guard to make them visible from ASM code.

This is prerequisite for autodetecting cache line size in runtime.

Signed-off-by: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
---
 arch/arc/include/asm/cache.h | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h
index 2ad77fb43639..f1642634aab0 100644
--- a/arch/arc/include/asm/cache.h
+++ b/arch/arc/include/asm/cache.h
@@ -9,6 +9,12 @@
 #ifndef __ARC_ASM_CACHE_H
 #define __ARC_ASM_CACHE_H
 
+/* Largest line length for either L1 or L2 is 128 bytes (2^7) */
+#define ARC_MAX_CACHE_SHIFT	7
+#define ARC_MAX_CACHE_BYTES	(1 << (ARC_MAX_CACHE_SHIFT))
+#define SMP_CACHE_BYTES		ARC_MAX_CACHE_BYTES
+#define ARCH_DMA_MINALIGN	ARC_MAX_CACHE_BYTES
+
 /* In case $$ not config, setup a dummy number for rest of kernel */
 #ifndef CONFIG_ARC_CACHE_LINE_SHIFT
 #define L1_CACHE_SHIFT		6
@@ -47,10 +53,7 @@
 	: "r"(data), "r"(ptr));		\
 })
 
-/* Largest line length for either L1 or L2 is 128 bytes */
-#define SMP_CACHE_BYTES		128
-#define cache_line_size()	SMP_CACHE_BYTES
-#define ARCH_DMA_MINALIGN	SMP_CACHE_BYTES
+#define cache_line_size()	ARC_MAX_CACHE_BYTES
 
 /*
  * Make sure slab-allocated buffers are 64-bit aligned when atomic64_t uses
-- 
2.14.5


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 1/3] ARC: cache: declare cache-related defines via ARC_MAX_CACHE_SHIFT
@ 2019-04-16 17:10   ` Eugeniy Paltsev
  0 siblings, 0 replies; 12+ messages in thread
From: Eugeniy Paltsev @ 2019-04-16 17:10 UTC (permalink / raw)
  To: linux-snps-arc

 * Declare all cache-related defines where we need maximum cache
line size (like SMP_CACHE_BYTES, ARCH_DMA_MINALIGN, etc...) via
one common ARC_MAX_CACHE_SHIFT define.

 * Move all cache-related defines outside '#ifdef __ASSEMBLY__'
guard to make them visible from ASM code.

This is prerequisite for autodetecting cache line size in runtime.

Signed-off-by: Eugeniy Paltsev <Eugeniy.Paltsev at synopsys.com>
---
 arch/arc/include/asm/cache.h | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h
index 2ad77fb43639..f1642634aab0 100644
--- a/arch/arc/include/asm/cache.h
+++ b/arch/arc/include/asm/cache.h
@@ -9,6 +9,12 @@
 #ifndef __ARC_ASM_CACHE_H
 #define __ARC_ASM_CACHE_H
 
+/* Largest line length for either L1 or L2 is 128 bytes (2^7) */
+#define ARC_MAX_CACHE_SHIFT	7
+#define ARC_MAX_CACHE_BYTES	(1 << (ARC_MAX_CACHE_SHIFT))
+#define SMP_CACHE_BYTES		ARC_MAX_CACHE_BYTES
+#define ARCH_DMA_MINALIGN	ARC_MAX_CACHE_BYTES
+
 /* In case $$ not config, setup a dummy number for rest of kernel */
 #ifndef CONFIG_ARC_CACHE_LINE_SHIFT
 #define L1_CACHE_SHIFT		6
@@ -47,10 +53,7 @@
 	: "r"(data), "r"(ptr));		\
 })
 
-/* Largest line length for either L1 or L2 is 128 bytes */
-#define SMP_CACHE_BYTES		128
-#define cache_line_size()	SMP_CACHE_BYTES
-#define ARCH_DMA_MINALIGN	SMP_CACHE_BYTES
+#define cache_line_size()	ARC_MAX_CACHE_BYTES
 
 /*
  * Make sure slab-allocated buffers are 64-bit aligned when atomic64_t uses
-- 
2.14.5

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 2/3] ARC: cache: check cache configuration on each CPU
  2019-04-16 17:10 ` Eugeniy Paltsev
@ 2019-04-16 17:10   ` Eugeniy Paltsev
  -1 siblings, 0 replies; 12+ messages in thread
From: Eugeniy Paltsev @ 2019-04-16 17:10 UTC (permalink / raw)
  To: linux-snps-arc, Vineet Gupta
  Cc: linux-kernel, Alexey Brodkin, Eugeniy Paltsev

ARC kernel code assumes that all cores have same cache config
but as of today we check cache configuration only on master CPU.
Fix that and check cache configuration on each CPU.

Also while I'm at it, split cache_init_master() for two parts:
 * checks/setups related to master L1 caches
 * the rest of cache checks/setups which need to be done once
   (like IOC / SLC / dma callbacks setup)

Both of these changes are prerequisites for autodetecting cache
line size in runtime.

Signed-off-by: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
---
 arch/arc/mm/cache.c | 66 +++++++++++++++++++++++++++++++++++------------------
 1 file changed, 44 insertions(+), 22 deletions(-)

diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index 4135abec3fb0..1036bd56f518 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -1208,27 +1208,43 @@ noinline void __init arc_ioc_setup(void)
 	__dc_enable();
 }
 
+#if IS_ENABLED(CONFIG_ARC_HAS_ICACHE) || IS_ENABLED(CONFIG_ARC_HAS_DCACHE)
+static void arc_l1_line_check(unsigned int line_len, const char *cache_name)
+{
+	if (!line_len)
+		panic("%s support enabled but non-existent cache\n",
+		      cache_name);
+
+	if (line_len != L1_CACHE_BYTES)
+		panic("%s line size [%u] != expected [%u]",
+		      cache_name, line_len, L1_CACHE_BYTES);
+}
+#endif /* IS_ENABLED(CONFIG_ARC_HAS_ICACHE) || IS_ENABLED(CONFIG_ARC_HAS_DCACHE) */
+
 /*
- * Cache related boot time checks/setups only needed on master CPU:
- *  - Geometry checks (kernel build and hardware agree: e.g. L1_CACHE_BYTES)
- *    Assume SMP only, so all cores will have same cache config. A check on
- *    one core suffices for all
- *  - IOC setup / dma callbacks only need to be done once
+ * Cache related boot time checks needed on every CPU.
  */
-void __init arc_cache_init_master(void)
+static void arc_l1_cache_check(unsigned int cpu)
 {
-	unsigned int __maybe_unused cpu = smp_processor_id();
+	if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE))
+		arc_l1_line_check(cpuinfo_arc700[cpu].icache.line_len, "ICache");
+
+	if (IS_ENABLED(CONFIG_ARC_HAS_DCACHE))
+		arc_l1_line_check(cpuinfo_arc700[cpu].dcache.line_len, "DCache");
+}
 
+/*
+ * L1 Cache related boot time checks/setups needed on master CPU:
+ * This checks/setups are done in assumption that all CPU have same cache
+ * configuration (we validate this in arc_cache_check()):
+ *  - Geometry checks
+ *  - L1 cache line loop callbacks
+ */
+void __init arc_l1_cache_init_master(unsigned int cpu)
+{
 	if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE)) {
 		struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache;
 
-		if (!ic->line_len)
-			panic("cache support enabled but non-existent cache\n");
-
-		if (ic->line_len != L1_CACHE_BYTES)
-			panic("ICache line [%d] != kernel Config [%d]",
-			      ic->line_len, L1_CACHE_BYTES);
-
 		/*
 		 * In MMU v4 (HS38x) the aliasing icache config uses IVIL/PTAG
 		 * pair to provide vaddr/paddr respectively, just as in MMU v3
@@ -1242,13 +1258,6 @@ void __init arc_cache_init_master(void)
 	if (IS_ENABLED(CONFIG_ARC_HAS_DCACHE)) {
 		struct cpuinfo_arc_cache *dc = &cpuinfo_arc700[cpu].dcache;
 
-		if (!dc->line_len)
-			panic("cache support enabled but non-existent cache\n");
-
-		if (dc->line_len != L1_CACHE_BYTES)
-			panic("DCache line [%d] != kernel Config [%d]",
-			      dc->line_len, L1_CACHE_BYTES);
-
 		/* check for D-Cache aliasing on ARCompact: ARCv2 has PIPT */
 		if (is_isa_arcompact()) {
 			int handled = IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING);
@@ -1271,6 +1280,14 @@ void __init arc_cache_init_master(void)
 	 */
 	BUILD_BUG_ON_MSG(L1_CACHE_BYTES > SMP_CACHE_BYTES,
 			 "SMP_CACHE_BYTES must be >= any cache line length");
+}
+
+/*
+ * Cache related boot time checks/setups needed on master CPU:
+ *  - IOC setup / SLC setup / dma callbacks only need to be done once
+ */
+void __init arc_cache_init_master(void)
+{
 	if (is_isa_arcv2() && (l2_line_sz > SMP_CACHE_BYTES))
 		panic("L2 Cache line [%d] > kernel Config [%d]\n",
 		      l2_line_sz, SMP_CACHE_BYTES);
@@ -1301,11 +1318,16 @@ void __init arc_cache_init_master(void)
 
 void __ref arc_cache_init(void)
 {
-	unsigned int __maybe_unused cpu = smp_processor_id();
+	unsigned int cpu = smp_processor_id();
 	char str[256];
 
 	pr_info("%s", arc_cache_mumbojumbo(0, str, sizeof(str)));
 
+	if (!cpu)
+		arc_l1_cache_init_master(cpu);
+
+	arc_l1_cache_check(cpu);
+
 	if (!cpu)
 		arc_cache_init_master();
 
-- 
2.14.5


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 2/3] ARC: cache: check cache configuration on each CPU
@ 2019-04-16 17:10   ` Eugeniy Paltsev
  0 siblings, 0 replies; 12+ messages in thread
From: Eugeniy Paltsev @ 2019-04-16 17:10 UTC (permalink / raw)
  To: linux-snps-arc

ARC kernel code assumes that all cores have same cache config
but as of today we check cache configuration only on master CPU.
Fix that and check cache configuration on each CPU.

Also while I'm at it, split cache_init_master() for two parts:
 * checks/setups related to master L1 caches
 * the rest of cache checks/setups which need to be done once
   (like IOC / SLC / dma callbacks setup)

Both of these changes are prerequisites for autodetecting cache
line size in runtime.

Signed-off-by: Eugeniy Paltsev <Eugeniy.Paltsev at synopsys.com>
---
 arch/arc/mm/cache.c | 66 +++++++++++++++++++++++++++++++++++------------------
 1 file changed, 44 insertions(+), 22 deletions(-)

diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index 4135abec3fb0..1036bd56f518 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -1208,27 +1208,43 @@ noinline void __init arc_ioc_setup(void)
 	__dc_enable();
 }
 
+#if IS_ENABLED(CONFIG_ARC_HAS_ICACHE) || IS_ENABLED(CONFIG_ARC_HAS_DCACHE)
+static void arc_l1_line_check(unsigned int line_len, const char *cache_name)
+{
+	if (!line_len)
+		panic("%s support enabled but non-existent cache\n",
+		      cache_name);
+
+	if (line_len != L1_CACHE_BYTES)
+		panic("%s line size [%u] != expected [%u]",
+		      cache_name, line_len, L1_CACHE_BYTES);
+}
+#endif /* IS_ENABLED(CONFIG_ARC_HAS_ICACHE) || IS_ENABLED(CONFIG_ARC_HAS_DCACHE) */
+
 /*
- * Cache related boot time checks/setups only needed on master CPU:
- *  - Geometry checks (kernel build and hardware agree: e.g. L1_CACHE_BYTES)
- *    Assume SMP only, so all cores will have same cache config. A check on
- *    one core suffices for all
- *  - IOC setup / dma callbacks only need to be done once
+ * Cache related boot time checks needed on every CPU.
  */
-void __init arc_cache_init_master(void)
+static void arc_l1_cache_check(unsigned int cpu)
 {
-	unsigned int __maybe_unused cpu = smp_processor_id();
+	if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE))
+		arc_l1_line_check(cpuinfo_arc700[cpu].icache.line_len, "ICache");
+
+	if (IS_ENABLED(CONFIG_ARC_HAS_DCACHE))
+		arc_l1_line_check(cpuinfo_arc700[cpu].dcache.line_len, "DCache");
+}
 
+/*
+ * L1 Cache related boot time checks/setups needed on master CPU:
+ * This checks/setups are done in assumption that all CPU have same cache
+ * configuration (we validate this in arc_cache_check()):
+ *  - Geometry checks
+ *  - L1 cache line loop callbacks
+ */
+void __init arc_l1_cache_init_master(unsigned int cpu)
+{
 	if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE)) {
 		struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache;
 
-		if (!ic->line_len)
-			panic("cache support enabled but non-existent cache\n");
-
-		if (ic->line_len != L1_CACHE_BYTES)
-			panic("ICache line [%d] != kernel Config [%d]",
-			      ic->line_len, L1_CACHE_BYTES);
-
 		/*
 		 * In MMU v4 (HS38x) the aliasing icache config uses IVIL/PTAG
 		 * pair to provide vaddr/paddr respectively, just as in MMU v3
@@ -1242,13 +1258,6 @@ void __init arc_cache_init_master(void)
 	if (IS_ENABLED(CONFIG_ARC_HAS_DCACHE)) {
 		struct cpuinfo_arc_cache *dc = &cpuinfo_arc700[cpu].dcache;
 
-		if (!dc->line_len)
-			panic("cache support enabled but non-existent cache\n");
-
-		if (dc->line_len != L1_CACHE_BYTES)
-			panic("DCache line [%d] != kernel Config [%d]",
-			      dc->line_len, L1_CACHE_BYTES);
-
 		/* check for D-Cache aliasing on ARCompact: ARCv2 has PIPT */
 		if (is_isa_arcompact()) {
 			int handled = IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING);
@@ -1271,6 +1280,14 @@ void __init arc_cache_init_master(void)
 	 */
 	BUILD_BUG_ON_MSG(L1_CACHE_BYTES > SMP_CACHE_BYTES,
 			 "SMP_CACHE_BYTES must be >= any cache line length");
+}
+
+/*
+ * Cache related boot time checks/setups needed on master CPU:
+ *  - IOC setup / SLC setup / dma callbacks only need to be done once
+ */
+void __init arc_cache_init_master(void)
+{
 	if (is_isa_arcv2() && (l2_line_sz > SMP_CACHE_BYTES))
 		panic("L2 Cache line [%d] > kernel Config [%d]\n",
 		      l2_line_sz, SMP_CACHE_BYTES);
@@ -1301,11 +1318,16 @@ void __init arc_cache_init_master(void)
 
 void __ref arc_cache_init(void)
 {
-	unsigned int __maybe_unused cpu = smp_processor_id();
+	unsigned int cpu = smp_processor_id();
 	char str[256];
 
 	pr_info("%s", arc_cache_mumbojumbo(0, str, sizeof(str)));
 
+	if (!cpu)
+		arc_l1_cache_init_master(cpu);
+
+	arc_l1_cache_check(cpu);
+
 	if (!cpu)
 		arc_cache_init_master();
 
-- 
2.14.5

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 3/3] ARC: cache: allow to autodetect L1 cache line size
  2019-04-16 17:10 ` Eugeniy Paltsev
@ 2019-04-16 17:10   ` Eugeniy Paltsev
  -1 siblings, 0 replies; 12+ messages in thread
From: Eugeniy Paltsev @ 2019-04-16 17:10 UTC (permalink / raw)
  To: linux-snps-arc, Vineet Gupta
  Cc: linux-kernel, Alexey Brodkin, Eugeniy Paltsev

One step to "build once run anywhere"

Allow to autodetect L1 I/D caches line size in runtime instead of
relying on value provided via Kconfig.

This is controlled via CONFIG_ARC_CACHE_LINE_AUTODETECT Kconfig option
which is disabled by default.
 * In case of this option disabled there is no overhead in compare with
   current implementation.
 * In case of this option enabled there is some overhead in both speed
   and code size:
   - we use cache line size stored in the global variable instead of
     compile time available define, so compiler can't do some
     optimizations.
   - we align all cache related buffers by maximum possible cache line
     size. Nevertheless it isn't significant because we mostly use
     SMP_CACHE_BYTES or ARCH_DMA_MINALIGN to align stuff (they are
     equal to maximum possible cache line size)

Main change is the split L1_CACHE_BYTES for two separate defines:
 * L1_CACHE_BYTES      >= real L1 I$/D$ line size.
   Available at compile time. Used for alligning stuff.
 * CACHEOPS_L1_LINE_SZ == real L1 I$/D$ line size.
   Available at run time. Used in operations with cache lines/regions.

Signed-off-by: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
---
 arch/arc/Kconfig             | 10 +++++
 arch/arc/include/asm/cache.h |  9 ++++-
 arch/arc/lib/memset-archs.S  |  8 +++-
 arch/arc/mm/cache.c          | 89 ++++++++++++++++++++++++++++++++++----------
 4 files changed, 94 insertions(+), 22 deletions(-)

diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index c781e45d1d99..e7eb5ff1485d 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -215,10 +215,20 @@ menuconfig ARC_CACHE
 
 if ARC_CACHE
 
+config ARC_CACHE_LINE_AUTODETECT
+	bool "Detect cache lines length automatically in runtime"
+	depends on ARC_HAS_ICACHE || ARC_HAS_DCACHE
+	help
+	  ARC has configurable cache line length. Enable this option to detect
+	  all cache lines length automatically in runtime to make kernel image
+	  runnable on HW with different cache lines configuration.
+	  If you don't know what the above means, leave this setting alone.
+
 config ARC_CACHE_LINE_SHIFT
 	int "Cache Line Length (as power of 2)"
 	range 5 7
 	default "6"
+	depends on !ARC_CACHE_LINE_AUTODETECT
 	help
 	  Starting with ARC700 4.9, Cache line length is configurable,
 	  This option specifies "N", with Line-len = 2 power N
diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h
index f1642634aab0..0ff8e19008e4 100644
--- a/arch/arc/include/asm/cache.h
+++ b/arch/arc/include/asm/cache.h
@@ -15,15 +15,22 @@
 #define SMP_CACHE_BYTES		ARC_MAX_CACHE_BYTES
 #define ARCH_DMA_MINALIGN	ARC_MAX_CACHE_BYTES
 
+#if IS_ENABLED(CONFIG_ARC_CACHE_LINE_AUTODETECT)
+/*
+ * This must be used for aligning only. In case of cache line autodetect it is
+ * only safe to use maximum possible value here.
+ */
+#define L1_CACHE_SHIFT		ARC_MAX_CACHE_SHIFT
+#else
 /* In case $$ not config, setup a dummy number for rest of kernel */
 #ifndef CONFIG_ARC_CACHE_LINE_SHIFT
 #define L1_CACHE_SHIFT		6
 #else
 #define L1_CACHE_SHIFT		CONFIG_ARC_CACHE_LINE_SHIFT
 #endif
+#endif /* IS_ENABLED(CONFIG_ARC_CACHE_LINE_AUTODETECT) */
 
 #define L1_CACHE_BYTES		(1 << L1_CACHE_SHIFT)
-#define CACHE_LINE_MASK		(~(L1_CACHE_BYTES - 1))
 
 /*
  * ARC700 doesn't cache any access in top 1G (0xc000_0000 to 0xFFFF_FFFF)
diff --git a/arch/arc/lib/memset-archs.S b/arch/arc/lib/memset-archs.S
index b3373f5c88e0..4baeeea29482 100644
--- a/arch/arc/lib/memset-archs.S
+++ b/arch/arc/lib/memset-archs.S
@@ -16,9 +16,15 @@
  * line lengths (32B and 128B) you should rewrite code carefully checking
  * we don't call any prefetchw/prealloc instruction for L1 cache lines which
  * don't belongs to memset area.
+ *
+ * TODO: FIXME: as for today we chose not optimized memset implementation if we
+ * enable ARC_CACHE_LINE_AUTODETECT option (as we don't know L1 cache line
+ * size in compile time).
+ * One possible way to fix this is to declare memset as a function pointer and
+ * update it when we discover actual cache line size.
  */
 
-#if L1_CACHE_SHIFT == 6
+#if (!IS_ENABLED(CONFIG_ARC_CACHE_LINE_AUTODETECT)) && (L1_CACHE_SHIFT == 6)
 
 .macro PREALLOC_INSTR	reg, off
 	prealloc	[\reg, \off]
diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index 1036bd56f518..8d006c1d12a1 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -25,6 +25,22 @@
 #define USE_RGN_FLSH	1
 #endif
 
+/*
+ * Cache line defines and real L1 I$/D$ line size relations:
+ *
+ * L1_CACHE_BYTES      >= real L1 I$/D$ line size. Available at compile time.
+ * CACHEOPS_L1_LINE_SZ == real L1 I$/D$ line size. Available at run time.
+ */
+#if IS_ENABLED(CONFIG_ARC_CACHE_LINE_AUTODETECT)
+#define CACHEOPS_L1_LINE_SZ		l1_line_sz
+#define CACHEOPS_L1_LINE_MASK		l1_line_mask
+#else
+#define CACHEOPS_L1_LINE_SZ		L1_CACHE_BYTES
+#define CACHEOPS_L1_LINE_MASK		(~((CACHEOPS_L1_LINE_SZ) - 1))
+#endif /* IS_ENABLED(CONFIG_ARC_CACHE_LINE_AUTODETECT) */
+
+static unsigned int l1_line_sz;
+static unsigned long l1_line_mask;
 static int l2_line_sz;
 static int ioc_exists;
 int slc_enable = 1, ioc_enable = 1;
@@ -256,19 +272,19 @@ void __cache_line_loop_v2(phys_addr_t paddr, unsigned long vaddr,
 	 *  -@sz will be integral multiple of line size (being page sized).
 	 */
 	if (!full_page) {
-		sz += paddr & ~CACHE_LINE_MASK;
-		paddr &= CACHE_LINE_MASK;
-		vaddr &= CACHE_LINE_MASK;
+		sz += paddr & ~CACHEOPS_L1_LINE_MASK;
+		paddr &= CACHEOPS_L1_LINE_MASK;
+		vaddr &= CACHEOPS_L1_LINE_MASK;
 	}
 
-	num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
+	num_lines = DIV_ROUND_UP(sz, CACHEOPS_L1_LINE_SZ);
 
 	/* MMUv2 and before: paddr contains stuffed vaddrs bits */
 	paddr |= (vaddr >> PAGE_SHIFT) & 0x1F;
 
 	while (num_lines-- > 0) {
 		write_aux_reg(aux_cmd, paddr);
-		paddr += L1_CACHE_BYTES;
+		paddr += CACHEOPS_L1_LINE_SZ;
 	}
 }
 
@@ -302,11 +318,11 @@ void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr,
 	 *  -@sz will be integral multiple of line size (being page sized).
 	 */
 	if (!full_page) {
-		sz += paddr & ~CACHE_LINE_MASK;
-		paddr &= CACHE_LINE_MASK;
-		vaddr &= CACHE_LINE_MASK;
+		sz += paddr & ~CACHEOPS_L1_LINE_MASK;
+		paddr &= CACHEOPS_L1_LINE_MASK;
+		vaddr &= CACHEOPS_L1_LINE_MASK;
 	}
-	num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
+	num_lines = DIV_ROUND_UP(sz, CACHEOPS_L1_LINE_SZ);
 
 	/*
 	 * MMUv3, cache ops require paddr in PTAG reg
@@ -328,11 +344,11 @@ void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr,
 	while (num_lines-- > 0) {
 		if (!full_page) {
 			write_aux_reg(aux_tag, paddr);
-			paddr += L1_CACHE_BYTES;
+			paddr += CACHEOPS_L1_LINE_SZ;
 		}
 
 		write_aux_reg(aux_cmd, vaddr);
-		vaddr += L1_CACHE_BYTES;
+		vaddr += CACHEOPS_L1_LINE_SZ;
 	}
 }
 
@@ -372,11 +388,11 @@ void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr,
 	 *  -@sz will be integral multiple of line size (being page sized).
 	 */
 	if (!full_page) {
-		sz += paddr & ~CACHE_LINE_MASK;
-		paddr &= CACHE_LINE_MASK;
+		sz += paddr & ~CACHEOPS_L1_LINE_MASK;
+		paddr &= CACHEOPS_L1_LINE_MASK;
 	}
 
-	num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
+	num_lines = DIV_ROUND_UP(sz, CACHEOPS_L1_LINE_SZ);
 
 	/*
 	 * For HS38 PAE40 configuration
@@ -396,7 +412,7 @@ void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr,
 
 	while (num_lines-- > 0) {
 		write_aux_reg(aux_cmd, paddr);
-		paddr += L1_CACHE_BYTES;
+		paddr += CACHEOPS_L1_LINE_SZ;
 	}
 }
 
@@ -422,14 +438,14 @@ void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr,
 
 	if (!full_page) {
 		/* for any leading gap between @paddr and start of cache line */
-		sz += paddr & ~CACHE_LINE_MASK;
-		paddr &= CACHE_LINE_MASK;
+		sz += paddr & ~CACHEOPS_L1_LINE_MASK;
+		paddr &= CACHEOPS_L1_LINE_MASK;
 
 		/*
 		 *  account for any trailing gap to end of cache line
 		 *  this is equivalent to DIV_ROUND_UP() in line ops above
 		 */
-		sz += L1_CACHE_BYTES - 1;
+		sz += CACHEOPS_L1_LINE_SZ - 1;
 	}
 
 	if (is_pae40_enabled()) {
@@ -1215,14 +1231,21 @@ static void arc_l1_line_check(unsigned int line_len, const char *cache_name)
 		panic("%s support enabled but non-existent cache\n",
 		      cache_name);
 
-	if (line_len != L1_CACHE_BYTES)
+	/*
+	 * In case of CONFIG_ARC_CACHE_LINE_AUTODETECT disabled we check
+	 * that cache line size is equal to provided via Kconfig,
+	 * in case of CONFIG_ARC_CACHE_LINE_AUTODETECT enabled we check
+	 * that cache line size is equal for every L1 (I/D) cache on every cpu.
+	 */
+	if (line_len != CACHEOPS_L1_LINE_SZ)
 		panic("%s line size [%u] != expected [%u]",
-		      cache_name, line_len, L1_CACHE_BYTES);
+		      cache_name, line_len, CACHEOPS_L1_LINE_SZ);
 }
 #endif /* IS_ENABLED(CONFIG_ARC_HAS_ICACHE) || IS_ENABLED(CONFIG_ARC_HAS_DCACHE) */
 
 /*
  * Cache related boot time checks needed on every CPU.
+ * NOTE: This function expects 'l1_line_sz' to be set.
  */
 static void arc_l1_cache_check(unsigned int cpu)
 {
@@ -1239,12 +1262,28 @@ static void arc_l1_cache_check(unsigned int cpu)
  * configuration (we validate this in arc_cache_check()):
  *  - Geometry checks
  *  - L1 cache line loop callbacks
+ *  - l1_line_sz / l1_line_mask setup
  */
 void __init arc_l1_cache_init_master(unsigned int cpu)
 {
+	/*
+	 * 'l1_line_sz' programing model:
+	 * We simplify programing of 'l1_line_sz' as we assume that we don't
+	 * support case where CPU have different cache configuration.
+	 * 1. Assign to 'l1_line_sz' length of any (I/D) L1 cache line of
+	 *    master CPU.
+	 * 2. Validate 'l1_line_sz' length itself.
+	 * 3. Check that both L1 I$/D$ lines on each CPU are equal to
+	 *    'l1_line_sz' (or to value provided via Kconfig in case of
+	 *     CONFIG_ARC_CACHE_LINE_AUTODETECT is disabled). This is done in
+	 *     arc_cache_check() which is called for each CPU.
+	 */
+
 	if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE)) {
 		struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache;
 
+		l1_line_sz = ic->line_len;
+
 		/*
 		 * In MMU v4 (HS38x) the aliasing icache config uses IVIL/PTAG
 		 * pair to provide vaddr/paddr respectively, just as in MMU v3
@@ -1258,6 +1297,8 @@ void __init arc_l1_cache_init_master(unsigned int cpu)
 	if (IS_ENABLED(CONFIG_ARC_HAS_DCACHE)) {
 		struct cpuinfo_arc_cache *dc = &cpuinfo_arc700[cpu].dcache;
 
+		l1_line_sz = dc->line_len;
+
 		/* check for D-Cache aliasing on ARCompact: ARCv2 has PIPT */
 		if (is_isa_arcompact()) {
 			int handled = IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING);
@@ -1274,6 +1315,14 @@ void __init arc_l1_cache_init_master(unsigned int cpu)
 		}
 	}
 
+	if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE) ||
+	    IS_ENABLED(CONFIG_ARC_HAS_DCACHE)) {
+		if (l1_line_sz != 32 && l1_line_sz != 64 && l1_line_sz != 128)
+			panic("L1 cache line sz [%u] unsupported\n", l1_line_sz);
+
+		l1_line_mask = ~(l1_line_sz - 1);
+	}
+
 	/*
 	 * Check that SMP_CACHE_BYTES (and hence ARCH_DMA_MINALIGN) is larger
 	 * or equal to any cache line length.
-- 
2.14.5


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 3/3] ARC: cache: allow to autodetect L1 cache line size
@ 2019-04-16 17:10   ` Eugeniy Paltsev
  0 siblings, 0 replies; 12+ messages in thread
From: Eugeniy Paltsev @ 2019-04-16 17:10 UTC (permalink / raw)
  To: linux-snps-arc

One step to "build once run anywhere"

Allow to autodetect L1 I/D caches line size in runtime instead of
relying on value provided via Kconfig.

This is controlled via CONFIG_ARC_CACHE_LINE_AUTODETECT Kconfig option
which is disabled by default.
 * In case of this option disabled there is no overhead in compare with
   current implementation.
 * In case of this option enabled there is some overhead in both speed
   and code size:
   - we use cache line size stored in the global variable instead of
     compile time available define, so compiler can't do some
     optimizations.
   - we align all cache related buffers by maximum possible cache line
     size. Nevertheless it isn't significant because we mostly use
     SMP_CACHE_BYTES or ARCH_DMA_MINALIGN to align stuff (they are
     equal to maximum possible cache line size)

Main change is the split L1_CACHE_BYTES for two separate defines:
 * L1_CACHE_BYTES      >= real L1 I$/D$ line size.
   Available at compile time. Used for alligning stuff.
 * CACHEOPS_L1_LINE_SZ == real L1 I$/D$ line size.
   Available at run time. Used in operations with cache lines/regions.

Signed-off-by: Eugeniy Paltsev <Eugeniy.Paltsev at synopsys.com>
---
 arch/arc/Kconfig             | 10 +++++
 arch/arc/include/asm/cache.h |  9 ++++-
 arch/arc/lib/memset-archs.S  |  8 +++-
 arch/arc/mm/cache.c          | 89 ++++++++++++++++++++++++++++++++++----------
 4 files changed, 94 insertions(+), 22 deletions(-)

diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index c781e45d1d99..e7eb5ff1485d 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -215,10 +215,20 @@ menuconfig ARC_CACHE
 
 if ARC_CACHE
 
+config ARC_CACHE_LINE_AUTODETECT
+	bool "Detect cache lines length automatically in runtime"
+	depends on ARC_HAS_ICACHE || ARC_HAS_DCACHE
+	help
+	  ARC has configurable cache line length. Enable this option to detect
+	  all cache lines length automatically in runtime to make kernel image
+	  runnable on HW with different cache lines configuration.
+	  If you don't know what the above means, leave this setting alone.
+
 config ARC_CACHE_LINE_SHIFT
 	int "Cache Line Length (as power of 2)"
 	range 5 7
 	default "6"
+	depends on !ARC_CACHE_LINE_AUTODETECT
 	help
 	  Starting with ARC700 4.9, Cache line length is configurable,
 	  This option specifies "N", with Line-len = 2 power N
diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h
index f1642634aab0..0ff8e19008e4 100644
--- a/arch/arc/include/asm/cache.h
+++ b/arch/arc/include/asm/cache.h
@@ -15,15 +15,22 @@
 #define SMP_CACHE_BYTES		ARC_MAX_CACHE_BYTES
 #define ARCH_DMA_MINALIGN	ARC_MAX_CACHE_BYTES
 
+#if IS_ENABLED(CONFIG_ARC_CACHE_LINE_AUTODETECT)
+/*
+ * This must be used for aligning only. In case of cache line autodetect it is
+ * only safe to use maximum possible value here.
+ */
+#define L1_CACHE_SHIFT		ARC_MAX_CACHE_SHIFT
+#else
 /* In case $$ not config, setup a dummy number for rest of kernel */
 #ifndef CONFIG_ARC_CACHE_LINE_SHIFT
 #define L1_CACHE_SHIFT		6
 #else
 #define L1_CACHE_SHIFT		CONFIG_ARC_CACHE_LINE_SHIFT
 #endif
+#endif /* IS_ENABLED(CONFIG_ARC_CACHE_LINE_AUTODETECT) */
 
 #define L1_CACHE_BYTES		(1 << L1_CACHE_SHIFT)
-#define CACHE_LINE_MASK		(~(L1_CACHE_BYTES - 1))
 
 /*
  * ARC700 doesn't cache any access in top 1G (0xc000_0000 to 0xFFFF_FFFF)
diff --git a/arch/arc/lib/memset-archs.S b/arch/arc/lib/memset-archs.S
index b3373f5c88e0..4baeeea29482 100644
--- a/arch/arc/lib/memset-archs.S
+++ b/arch/arc/lib/memset-archs.S
@@ -16,9 +16,15 @@
  * line lengths (32B and 128B) you should rewrite code carefully checking
  * we don't call any prefetchw/prealloc instruction for L1 cache lines which
  * don't belongs to memset area.
+ *
+ * TODO: FIXME: as for today we chose not optimized memset implementation if we
+ * enable ARC_CACHE_LINE_AUTODETECT option (as we don't know L1 cache line
+ * size in compile time).
+ * One possible way to fix this is to declare memset as a function pointer and
+ * update it when we discover actual cache line size.
  */
 
-#if L1_CACHE_SHIFT == 6
+#if (!IS_ENABLED(CONFIG_ARC_CACHE_LINE_AUTODETECT)) && (L1_CACHE_SHIFT == 6)
 
 .macro PREALLOC_INSTR	reg, off
 	prealloc	[\reg, \off]
diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index 1036bd56f518..8d006c1d12a1 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -25,6 +25,22 @@
 #define USE_RGN_FLSH	1
 #endif
 
+/*
+ * Cache line defines and real L1 I$/D$ line size relations:
+ *
+ * L1_CACHE_BYTES      >= real L1 I$/D$ line size. Available at compile time.
+ * CACHEOPS_L1_LINE_SZ == real L1 I$/D$ line size. Available at run time.
+ */
+#if IS_ENABLED(CONFIG_ARC_CACHE_LINE_AUTODETECT)
+#define CACHEOPS_L1_LINE_SZ		l1_line_sz
+#define CACHEOPS_L1_LINE_MASK		l1_line_mask
+#else
+#define CACHEOPS_L1_LINE_SZ		L1_CACHE_BYTES
+#define CACHEOPS_L1_LINE_MASK		(~((CACHEOPS_L1_LINE_SZ) - 1))
+#endif /* IS_ENABLED(CONFIG_ARC_CACHE_LINE_AUTODETECT) */
+
+static unsigned int l1_line_sz;
+static unsigned long l1_line_mask;
 static int l2_line_sz;
 static int ioc_exists;
 int slc_enable = 1, ioc_enable = 1;
@@ -256,19 +272,19 @@ void __cache_line_loop_v2(phys_addr_t paddr, unsigned long vaddr,
 	 *  - at sz will be integral multiple of line size (being page sized).
 	 */
 	if (!full_page) {
-		sz += paddr & ~CACHE_LINE_MASK;
-		paddr &= CACHE_LINE_MASK;
-		vaddr &= CACHE_LINE_MASK;
+		sz += paddr & ~CACHEOPS_L1_LINE_MASK;
+		paddr &= CACHEOPS_L1_LINE_MASK;
+		vaddr &= CACHEOPS_L1_LINE_MASK;
 	}
 
-	num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
+	num_lines = DIV_ROUND_UP(sz, CACHEOPS_L1_LINE_SZ);
 
 	/* MMUv2 and before: paddr contains stuffed vaddrs bits */
 	paddr |= (vaddr >> PAGE_SHIFT) & 0x1F;
 
 	while (num_lines-- > 0) {
 		write_aux_reg(aux_cmd, paddr);
-		paddr += L1_CACHE_BYTES;
+		paddr += CACHEOPS_L1_LINE_SZ;
 	}
 }
 
@@ -302,11 +318,11 @@ void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr,
 	 *  - at sz will be integral multiple of line size (being page sized).
 	 */
 	if (!full_page) {
-		sz += paddr & ~CACHE_LINE_MASK;
-		paddr &= CACHE_LINE_MASK;
-		vaddr &= CACHE_LINE_MASK;
+		sz += paddr & ~CACHEOPS_L1_LINE_MASK;
+		paddr &= CACHEOPS_L1_LINE_MASK;
+		vaddr &= CACHEOPS_L1_LINE_MASK;
 	}
-	num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
+	num_lines = DIV_ROUND_UP(sz, CACHEOPS_L1_LINE_SZ);
 
 	/*
 	 * MMUv3, cache ops require paddr in PTAG reg
@@ -328,11 +344,11 @@ void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr,
 	while (num_lines-- > 0) {
 		if (!full_page) {
 			write_aux_reg(aux_tag, paddr);
-			paddr += L1_CACHE_BYTES;
+			paddr += CACHEOPS_L1_LINE_SZ;
 		}
 
 		write_aux_reg(aux_cmd, vaddr);
-		vaddr += L1_CACHE_BYTES;
+		vaddr += CACHEOPS_L1_LINE_SZ;
 	}
 }
 
@@ -372,11 +388,11 @@ void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr,
 	 *  - at sz will be integral multiple of line size (being page sized).
 	 */
 	if (!full_page) {
-		sz += paddr & ~CACHE_LINE_MASK;
-		paddr &= CACHE_LINE_MASK;
+		sz += paddr & ~CACHEOPS_L1_LINE_MASK;
+		paddr &= CACHEOPS_L1_LINE_MASK;
 	}
 
-	num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
+	num_lines = DIV_ROUND_UP(sz, CACHEOPS_L1_LINE_SZ);
 
 	/*
 	 * For HS38 PAE40 configuration
@@ -396,7 +412,7 @@ void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr,
 
 	while (num_lines-- > 0) {
 		write_aux_reg(aux_cmd, paddr);
-		paddr += L1_CACHE_BYTES;
+		paddr += CACHEOPS_L1_LINE_SZ;
 	}
 }
 
@@ -422,14 +438,14 @@ void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr,
 
 	if (!full_page) {
 		/* for any leading gap between @paddr and start of cache line */
-		sz += paddr & ~CACHE_LINE_MASK;
-		paddr &= CACHE_LINE_MASK;
+		sz += paddr & ~CACHEOPS_L1_LINE_MASK;
+		paddr &= CACHEOPS_L1_LINE_MASK;
 
 		/*
 		 *  account for any trailing gap to end of cache line
 		 *  this is equivalent to DIV_ROUND_UP() in line ops above
 		 */
-		sz += L1_CACHE_BYTES - 1;
+		sz += CACHEOPS_L1_LINE_SZ - 1;
 	}
 
 	if (is_pae40_enabled()) {
@@ -1215,14 +1231,21 @@ static void arc_l1_line_check(unsigned int line_len, const char *cache_name)
 		panic("%s support enabled but non-existent cache\n",
 		      cache_name);
 
-	if (line_len != L1_CACHE_BYTES)
+	/*
+	 * In case of CONFIG_ARC_CACHE_LINE_AUTODETECT disabled we check
+	 * that cache line size is equal to provided via Kconfig,
+	 * in case of CONFIG_ARC_CACHE_LINE_AUTODETECT enabled we check
+	 * that cache line size is equal for every L1 (I/D) cache on every cpu.
+	 */
+	if (line_len != CACHEOPS_L1_LINE_SZ)
 		panic("%s line size [%u] != expected [%u]",
-		      cache_name, line_len, L1_CACHE_BYTES);
+		      cache_name, line_len, CACHEOPS_L1_LINE_SZ);
 }
 #endif /* IS_ENABLED(CONFIG_ARC_HAS_ICACHE) || IS_ENABLED(CONFIG_ARC_HAS_DCACHE) */
 
 /*
  * Cache related boot time checks needed on every CPU.
+ * NOTE: This function expects 'l1_line_sz' to be set.
  */
 static void arc_l1_cache_check(unsigned int cpu)
 {
@@ -1239,12 +1262,28 @@ static void arc_l1_cache_check(unsigned int cpu)
  * configuration (we validate this in arc_cache_check()):
  *  - Geometry checks
  *  - L1 cache line loop callbacks
+ *  - l1_line_sz / l1_line_mask setup
  */
 void __init arc_l1_cache_init_master(unsigned int cpu)
 {
+	/*
+	 * 'l1_line_sz' programing model:
+	 * We simplify programing of 'l1_line_sz' as we assume that we don't
+	 * support case where CPU have different cache configuration.
+	 * 1. Assign to 'l1_line_sz' length of any (I/D) L1 cache line of
+	 *    master CPU.
+	 * 2. Validate 'l1_line_sz' length itself.
+	 * 3. Check that both L1 I$/D$ lines on each CPU are equal to
+	 *    'l1_line_sz' (or to value provided via Kconfig in case of
+	 *     CONFIG_ARC_CACHE_LINE_AUTODETECT is disabled). This is done in
+	 *     arc_cache_check() which is called for each CPU.
+	 */
+
 	if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE)) {
 		struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache;
 
+		l1_line_sz = ic->line_len;
+
 		/*
 		 * In MMU v4 (HS38x) the aliasing icache config uses IVIL/PTAG
 		 * pair to provide vaddr/paddr respectively, just as in MMU v3
@@ -1258,6 +1297,8 @@ void __init arc_l1_cache_init_master(unsigned int cpu)
 	if (IS_ENABLED(CONFIG_ARC_HAS_DCACHE)) {
 		struct cpuinfo_arc_cache *dc = &cpuinfo_arc700[cpu].dcache;
 
+		l1_line_sz = dc->line_len;
+
 		/* check for D-Cache aliasing on ARCompact: ARCv2 has PIPT */
 		if (is_isa_arcompact()) {
 			int handled = IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING);
@@ -1274,6 +1315,14 @@ void __init arc_l1_cache_init_master(unsigned int cpu)
 		}
 	}
 
+	if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE) ||
+	    IS_ENABLED(CONFIG_ARC_HAS_DCACHE)) {
+		if (l1_line_sz != 32 && l1_line_sz != 64 && l1_line_sz != 128)
+			panic("L1 cache line sz [%u] unsupported\n", l1_line_sz);
+
+		l1_line_mask = ~(l1_line_sz - 1);
+	}
+
 	/*
 	 * Check that SMP_CACHE_BYTES (and hence ARCH_DMA_MINALIGN) is larger
 	 * or equal to any cache line length.
-- 
2.14.5

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/3] ARC: cache: check cache configuration on each CPU
  2019-04-16 17:10   ` Eugeniy Paltsev
@ 2019-04-16 17:32     ` Vineet Gupta
  -1 siblings, 0 replies; 12+ messages in thread
From: Vineet Gupta @ 2019-04-16 17:32 UTC (permalink / raw)
  To: Eugeniy Paltsev, linux-snps-arc; +Cc: linux-kernel, Alexey Brodkin

On 4/16/19 10:10 AM, Eugeniy Paltsev wrote:
> ARC kernel code assumes that all cores have same cache config
> but as of today we check cache configuration only on master CPU.
> Fix that and check cache configuration on each CPU.

What is broken ? With current hardware it is impossible to have a SMP config with
different cache geometry, line size can definitely not be different.
This is adding needless cycles  for no apparent benefit.

>
> Also while I'm at it, split cache_init_master() for two parts:
>  * checks/setups related to master L1 caches
>  * the rest of cache checks/setups which need to be done once
>    (like IOC / SLC / dma callbacks setup)
>
> Both of these changes are prerequisites for autodetecting cache
> line size in runtime.
>
> Signed-off-by: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
> ---
>  arch/arc/mm/cache.c | 66 +++++++++++++++++++++++++++++++++++------------------
>  1 file changed, 44 insertions(+), 22 deletions(-)
>
> diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
> index 4135abec3fb0..1036bd56f518 100644
> --- a/arch/arc/mm/cache.c
> +++ b/arch/arc/mm/cache.c
> @@ -1208,27 +1208,43 @@ noinline void __init arc_ioc_setup(void)
>  	__dc_enable();
>  }
>  
> +#if IS_ENABLED(CONFIG_ARC_HAS_ICACHE) || IS_ENABLED(CONFIG_ARC_HAS_DCACHE)
> +static void arc_l1_line_check(unsigned int line_len, const char *cache_name)
> +{
> +	if (!line_len)
> +		panic("%s support enabled but non-existent cache\n",
> +		      cache_name);
> +
> +	if (line_len != L1_CACHE_BYTES)
> +		panic("%s line size [%u] != expected [%u]",
> +		      cache_name, line_len, L1_CACHE_BYTES);
> +}
> +#endif /* IS_ENABLED(CONFIG_ARC_HAS_ICACHE) || IS_ENABLED(CONFIG_ARC_HAS_DCACHE) */
> +
>  /*
> - * Cache related boot time checks/setups only needed on master CPU:
> - *  - Geometry checks (kernel build and hardware agree: e.g. L1_CACHE_BYTES)
> - *    Assume SMP only, so all cores will have same cache config. A check on
> - *    one core suffices for all
> - *  - IOC setup / dma callbacks only need to be done once
> + * Cache related boot time checks needed on every CPU.
>   */
> -void __init arc_cache_init_master(void)
> +static void arc_l1_cache_check(unsigned int cpu)
>  {
> -	unsigned int __maybe_unused cpu = smp_processor_id();
> +	if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE))
> +		arc_l1_line_check(cpuinfo_arc700[cpu].icache.line_len, "ICache");
> +
> +	if (IS_ENABLED(CONFIG_ARC_HAS_DCACHE))
> +		arc_l1_line_check(cpuinfo_arc700[cpu].dcache.line_len, "DCache");
> +}
>  
> +/*
> + * L1 Cache related boot time checks/setups needed on master CPU:
> + * This checks/setups are done in assumption that all CPU have same cache
> + * configuration (we validate this in arc_cache_check()):
> + *  - Geometry checks
> + *  - L1 cache line loop callbacks
> + */
> +void __init arc_l1_cache_init_master(unsigned int cpu)
> +{
>  	if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE)) {
>  		struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache;
>  
> -		if (!ic->line_len)
> -			panic("cache support enabled but non-existent cache\n");
> -
> -		if (ic->line_len != L1_CACHE_BYTES)
> -			panic("ICache line [%d] != kernel Config [%d]",
> -			      ic->line_len, L1_CACHE_BYTES);
> -
>  		/*
>  		 * In MMU v4 (HS38x) the aliasing icache config uses IVIL/PTAG
>  		 * pair to provide vaddr/paddr respectively, just as in MMU v3
> @@ -1242,13 +1258,6 @@ void __init arc_cache_init_master(void)
>  	if (IS_ENABLED(CONFIG_ARC_HAS_DCACHE)) {
>  		struct cpuinfo_arc_cache *dc = &cpuinfo_arc700[cpu].dcache;
>  
> -		if (!dc->line_len)
> -			panic("cache support enabled but non-existent cache\n");
> -
> -		if (dc->line_len != L1_CACHE_BYTES)
> -			panic("DCache line [%d] != kernel Config [%d]",
> -			      dc->line_len, L1_CACHE_BYTES);
> -
>  		/* check for D-Cache aliasing on ARCompact: ARCv2 has PIPT */
>  		if (is_isa_arcompact()) {
>  			int handled = IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING);
> @@ -1271,6 +1280,14 @@ void __init arc_cache_init_master(void)
>  	 */
>  	BUILD_BUG_ON_MSG(L1_CACHE_BYTES > SMP_CACHE_BYTES,
>  			 "SMP_CACHE_BYTES must be >= any cache line length");
> +}
> +
> +/*
> + * Cache related boot time checks/setups needed on master CPU:
> + *  - IOC setup / SLC setup / dma callbacks only need to be done once
> + */
> +void __init arc_cache_init_master(void)
> +{
>  	if (is_isa_arcv2() && (l2_line_sz > SMP_CACHE_BYTES))
>  		panic("L2 Cache line [%d] > kernel Config [%d]\n",
>  		      l2_line_sz, SMP_CACHE_BYTES);
> @@ -1301,11 +1318,16 @@ void __init arc_cache_init_master(void)
>  
>  void __ref arc_cache_init(void)
>  {
> -	unsigned int __maybe_unused cpu = smp_processor_id();
> +	unsigned int cpu = smp_processor_id();
>  	char str[256];
>  
>  	pr_info("%s", arc_cache_mumbojumbo(0, str, sizeof(str)));
>  
> +	if (!cpu)
> +		arc_l1_cache_init_master(cpu);
> +
> +	arc_l1_cache_check(cpu);
> +
>  	if (!cpu)
>  		arc_cache_init_master();
>  


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 2/3] ARC: cache: check cache configuration on each CPU
@ 2019-04-16 17:32     ` Vineet Gupta
  0 siblings, 0 replies; 12+ messages in thread
From: Vineet Gupta @ 2019-04-16 17:32 UTC (permalink / raw)
  To: linux-snps-arc

On 4/16/19 10:10 AM, Eugeniy Paltsev wrote:
> ARC kernel code assumes that all cores have same cache config
> but as of today we check cache configuration only on master CPU.
> Fix that and check cache configuration on each CPU.

What is broken ? With current hardware it is impossible to have a SMP config with
different cache geometry, line size can definitely not be different.
This is adding needless cycles  for no apparent benefit.

>
> Also while I'm at it, split cache_init_master() for two parts:
>  * checks/setups related to master L1 caches
>  * the rest of cache checks/setups which need to be done once
>    (like IOC / SLC / dma callbacks setup)
>
> Both of these changes are prerequisites for autodetecting cache
> line size in runtime.
>
> Signed-off-by: Eugeniy Paltsev <Eugeniy.Paltsev at synopsys.com>
> ---
>  arch/arc/mm/cache.c | 66 +++++++++++++++++++++++++++++++++++------------------
>  1 file changed, 44 insertions(+), 22 deletions(-)
>
> diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
> index 4135abec3fb0..1036bd56f518 100644
> --- a/arch/arc/mm/cache.c
> +++ b/arch/arc/mm/cache.c
> @@ -1208,27 +1208,43 @@ noinline void __init arc_ioc_setup(void)
>  	__dc_enable();
>  }
>  
> +#if IS_ENABLED(CONFIG_ARC_HAS_ICACHE) || IS_ENABLED(CONFIG_ARC_HAS_DCACHE)
> +static void arc_l1_line_check(unsigned int line_len, const char *cache_name)
> +{
> +	if (!line_len)
> +		panic("%s support enabled but non-existent cache\n",
> +		      cache_name);
> +
> +	if (line_len != L1_CACHE_BYTES)
> +		panic("%s line size [%u] != expected [%u]",
> +		      cache_name, line_len, L1_CACHE_BYTES);
> +}
> +#endif /* IS_ENABLED(CONFIG_ARC_HAS_ICACHE) || IS_ENABLED(CONFIG_ARC_HAS_DCACHE) */
> +
>  /*
> - * Cache related boot time checks/setups only needed on master CPU:
> - *  - Geometry checks (kernel build and hardware agree: e.g. L1_CACHE_BYTES)
> - *    Assume SMP only, so all cores will have same cache config. A check on
> - *    one core suffices for all
> - *  - IOC setup / dma callbacks only need to be done once
> + * Cache related boot time checks needed on every CPU.
>   */
> -void __init arc_cache_init_master(void)
> +static void arc_l1_cache_check(unsigned int cpu)
>  {
> -	unsigned int __maybe_unused cpu = smp_processor_id();
> +	if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE))
> +		arc_l1_line_check(cpuinfo_arc700[cpu].icache.line_len, "ICache");
> +
> +	if (IS_ENABLED(CONFIG_ARC_HAS_DCACHE))
> +		arc_l1_line_check(cpuinfo_arc700[cpu].dcache.line_len, "DCache");
> +}
>  
> +/*
> + * L1 Cache related boot time checks/setups needed on master CPU:
> + * This checks/setups are done in assumption that all CPU have same cache
> + * configuration (we validate this in arc_cache_check()):
> + *  - Geometry checks
> + *  - L1 cache line loop callbacks
> + */
> +void __init arc_l1_cache_init_master(unsigned int cpu)
> +{
>  	if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE)) {
>  		struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache;
>  
> -		if (!ic->line_len)
> -			panic("cache support enabled but non-existent cache\n");
> -
> -		if (ic->line_len != L1_CACHE_BYTES)
> -			panic("ICache line [%d] != kernel Config [%d]",
> -			      ic->line_len, L1_CACHE_BYTES);
> -
>  		/*
>  		 * In MMU v4 (HS38x) the aliasing icache config uses IVIL/PTAG
>  		 * pair to provide vaddr/paddr respectively, just as in MMU v3
> @@ -1242,13 +1258,6 @@ void __init arc_cache_init_master(void)
>  	if (IS_ENABLED(CONFIG_ARC_HAS_DCACHE)) {
>  		struct cpuinfo_arc_cache *dc = &cpuinfo_arc700[cpu].dcache;
>  
> -		if (!dc->line_len)
> -			panic("cache support enabled but non-existent cache\n");
> -
> -		if (dc->line_len != L1_CACHE_BYTES)
> -			panic("DCache line [%d] != kernel Config [%d]",
> -			      dc->line_len, L1_CACHE_BYTES);
> -
>  		/* check for D-Cache aliasing on ARCompact: ARCv2 has PIPT */
>  		if (is_isa_arcompact()) {
>  			int handled = IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING);
> @@ -1271,6 +1280,14 @@ void __init arc_cache_init_master(void)
>  	 */
>  	BUILD_BUG_ON_MSG(L1_CACHE_BYTES > SMP_CACHE_BYTES,
>  			 "SMP_CACHE_BYTES must be >= any cache line length");
> +}
> +
> +/*
> + * Cache related boot time checks/setups needed on master CPU:
> + *  - IOC setup / SLC setup / dma callbacks only need to be done once
> + */
> +void __init arc_cache_init_master(void)
> +{
>  	if (is_isa_arcv2() && (l2_line_sz > SMP_CACHE_BYTES))
>  		panic("L2 Cache line [%d] > kernel Config [%d]\n",
>  		      l2_line_sz, SMP_CACHE_BYTES);
> @@ -1301,11 +1318,16 @@ void __init arc_cache_init_master(void)
>  
>  void __ref arc_cache_init(void)
>  {
> -	unsigned int __maybe_unused cpu = smp_processor_id();
> +	unsigned int cpu = smp_processor_id();
>  	char str[256];
>  
>  	pr_info("%s", arc_cache_mumbojumbo(0, str, sizeof(str)));
>  
> +	if (!cpu)
> +		arc_l1_cache_init_master(cpu);
> +
> +	arc_l1_cache_check(cpu);
> +
>  	if (!cpu)
>  		arc_cache_init_master();
>  

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 3/3] ARC: cache: allow to autodetect L1 cache line size
  2019-04-16 17:10   ` Eugeniy Paltsev
@ 2019-04-16 17:51     ` Vineet Gupta
  -1 siblings, 0 replies; 12+ messages in thread
From: Vineet Gupta @ 2019-04-16 17:51 UTC (permalink / raw)
  To: Eugeniy Paltsev, linux-snps-arc; +Cc: linux-kernel, Alexey Brodkin

On 4/16/19 10:10 AM, Eugeniy Paltsev wrote:
> One step to "build once run anywhere"

This is what I'm afraid is going to happen. We will slowly sneak this into
defconfigs and this will become the default. Occasional need for verification
doesn't necessarily need this complexity to be maintained. Not all hacks need to
be upstream.

> Allow to autodetect L1 I/D caches line size in runtime instead of
> relying on value provided via Kconfig.
>
> This is controlled via CONFIG_ARC_CACHE_LINE_AUTODETECT Kconfig option
> which is disabled by default.
>  * In case of this option disabled there is no overhead in compare with
>    current implementation.
>  * In case of this option enabled there is some overhead in both speed
>    and code size:
>    - we use cache line size stored in the global variable instead of
>      compile time available define, so compiler can't do some
>      optimizations.
>    - we align all cache related buffers by maximum possible cache line
>      size. Nevertheless it isn't significant because we mostly use
>      SMP_CACHE_BYTES or ARCH_DMA_MINALIGN to align stuff (they are
>      equal to maximum possible cache line size)
>
> Main change is the split L1_CACHE_BYTES for two separate defines:
>  * L1_CACHE_BYTES      >= real L1 I$/D$ line size.
>    Available at compile time. Used for alligning stuff.
>  * CACHEOPS_L1_LINE_SZ == real L1 I$/D$ line size.
>    Available at run time. Used in operations with cache lines/regions.
>
> Signed-off-by: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
> ---
>  arch/arc/Kconfig             | 10 +++++
>  arch/arc/include/asm/cache.h |  9 ++++-
>  arch/arc/lib/memset-archs.S  |  8 +++-
>  arch/arc/mm/cache.c          | 89 ++++++++++++++++++++++++++++++++++----------
>  4 files changed, 94 insertions(+), 22 deletions(-)
>
> diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
> index c781e45d1d99..e7eb5ff1485d 100644
> --- a/arch/arc/Kconfig
> +++ b/arch/arc/Kconfig
> @@ -215,10 +215,20 @@ menuconfig ARC_CACHE
>  
>  if ARC_CACHE
>  
> +config ARC_CACHE_LINE_AUTODETECT
> +	bool "Detect cache lines length automatically in runtime"
> +	depends on ARC_HAS_ICACHE || ARC_HAS_DCACHE
> +	help
> +	  ARC has configurable cache line length. Enable this option to detect
> +	  all cache lines length automatically in runtime to make kernel image
> +	  runnable on HW with different cache lines configuration.
> +	  If you don't know what the above means, leave this setting alone.
> +
>  config ARC_CACHE_LINE_SHIFT
>  	int "Cache Line Length (as power of 2)"
>  	range 5 7
>  	default "6"
> +	depends on !ARC_CACHE_LINE_AUTODETECT
>  	help
>  	  Starting with ARC700 4.9, Cache line length is configurable,
>  	  This option specifies "N", with Line-len = 2 power N
> diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h
> index f1642634aab0..0ff8e19008e4 100644
> --- a/arch/arc/include/asm/cache.h
> +++ b/arch/arc/include/asm/cache.h
> @@ -15,15 +15,22 @@
>  #define SMP_CACHE_BYTES		ARC_MAX_CACHE_BYTES
>  #define ARCH_DMA_MINALIGN	ARC_MAX_CACHE_BYTES
>  
> +#if IS_ENABLED(CONFIG_ARC_CACHE_LINE_AUTODETECT)
> +/*
> + * This must be used for aligning only. In case of cache line autodetect it is
> + * only safe to use maximum possible value here.
> + */
> +#define L1_CACHE_SHIFT		ARC_MAX_CACHE_SHIFT
> +#else
>  /* In case $$ not config, setup a dummy number for rest of kernel */
>  #ifndef CONFIG_ARC_CACHE_LINE_SHIFT
>  #define L1_CACHE_SHIFT		6
>  #else
>  #define L1_CACHE_SHIFT		CONFIG_ARC_CACHE_LINE_SHIFT
>  #endif
> +#endif /* IS_ENABLED(CONFIG_ARC_CACHE_LINE_AUTODETECT) */
>  
>  #define L1_CACHE_BYTES		(1 << L1_CACHE_SHIFT)
> -#define CACHE_LINE_MASK		(~(L1_CACHE_BYTES - 1))
>  
>  /*
>   * ARC700 doesn't cache any access in top 1G (0xc000_0000 to 0xFFFF_FFFF)
> diff --git a/arch/arc/lib/memset-archs.S b/arch/arc/lib/memset-archs.S
> index b3373f5c88e0..4baeeea29482 100644
> --- a/arch/arc/lib/memset-archs.S
> +++ b/arch/arc/lib/memset-archs.S
> @@ -16,9 +16,15 @@
>   * line lengths (32B and 128B) you should rewrite code carefully checking
>   * we don't call any prefetchw/prealloc instruction for L1 cache lines which
>   * don't belongs to memset area.
> + *
> + * TODO: FIXME: as for today we chose not optimized memset implementation if we
> + * enable ARC_CACHE_LINE_AUTODETECT option (as we don't know L1 cache line
> + * size in compile time).
> + * One possible way to fix this is to declare memset as a function pointer and
> + * update it when we discover actual cache line size.
>   */
>  
> -#if L1_CACHE_SHIFT == 6
> +#if (!IS_ENABLED(CONFIG_ARC_CACHE_LINE_AUTODETECT)) && (L1_CACHE_SHIFT == 6)
>  
>  .macro PREALLOC_INSTR	reg, off
>  	prealloc	[\reg, \off]

You really want verification testing to hit these instructions....

> diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
> index 1036bd56f518..8d006c1d12a1 100644
> --- a/arch/arc/mm/cache.c
> +++ b/arch/arc/mm/cache.c
> @@ -25,6 +25,22 @@
>  #define USE_RGN_FLSH	1
>  #endif
>  
> +/*
> + * Cache line defines and real L1 I$/D$ line size relations:
> + *
> + * L1_CACHE_BYTES      >= real L1 I$/D$ line size. Available at compile time.

Call it "Max possible line size" compile time constant

> + * CACHEOPS_L1_LINE_SZ == real L1 I$/D$ line size. Available at run time.
> + */
> +#if IS_ENABLED(CONFIG_ARC_CACHE_LINE_AUTODETECT)
> +#define CACHEOPS_L1_LINE_SZ		l1_line_sz
> +#define CACHEOPS_L1_LINE_MASK		l1_line_mask
> +#else
> +#define CACHEOPS_L1_LINE_SZ		L1_CACHE_BYTES
> +#define CACHEOPS_L1_LINE_MASK		(~((CACHEOPS_L1_LINE_SZ) - 1))
> +#endif /* IS_ENABLED(CONFIG_ARC_CACHE_LINE_AUTODETECT) */
> +
> +static unsigned int l1_line_sz;
> +static unsigned long l1_line_mask;
>  static int l2_line_sz;
>  static int ioc_exists;
>  int slc_enable = 1, ioc_enable = 1;
> @@ -256,19 +272,19 @@ void __cache_line_loop_v2(phys_addr_t paddr, unsigned long vaddr,
>  	 *  -@sz will be integral multiple of line size (being page sized).
>  	 */
>  	if (!full_page) {
> -		sz += paddr & ~CACHE_LINE_MASK;
> -		paddr &= CACHE_LINE_MASK;
> -		vaddr &= CACHE_LINE_MASK;
> +		sz += paddr & ~CACHEOPS_L1_LINE_MASK;
> +		paddr &= CACHEOPS_L1_LINE_MASK;
> +		vaddr &= CACHEOPS_L1_LINE_MASK;
>  	}
>  
> -	num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
> +	num_lines = DIV_ROUND_UP(sz, CACHEOPS_L1_LINE_SZ);
>  
>  	/* MMUv2 and before: paddr contains stuffed vaddrs bits */
>  	paddr |= (vaddr >> PAGE_SHIFT) & 0x1F;
>  
>  	while (num_lines-- > 0) {
>  		write_aux_reg(aux_cmd, paddr);
> -		paddr += L1_CACHE_BYTES;
> +		paddr += CACHEOPS_L1_LINE_SZ;
>  	}
>  }
>  
> @@ -302,11 +318,11 @@ void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr,
>  	 *  -@sz will be integral multiple of line size (being page sized).
>  	 */
>  	if (!full_page) {
> -		sz += paddr & ~CACHE_LINE_MASK;
> -		paddr &= CACHE_LINE_MASK;
> -		vaddr &= CACHE_LINE_MASK;
> +		sz += paddr & ~CACHEOPS_L1_LINE_MASK;
> +		paddr &= CACHEOPS_L1_LINE_MASK;
> +		vaddr &= CACHEOPS_L1_LINE_MASK;
>  	}
> -	num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
> +	num_lines = DIV_ROUND_UP(sz, CACHEOPS_L1_LINE_SZ);
>  
>  	/*
>  	 * MMUv3, cache ops require paddr in PTAG reg
> @@ -328,11 +344,11 @@ void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr,
>  	while (num_lines-- > 0) {
>  		if (!full_page) {
>  			write_aux_reg(aux_tag, paddr);
> -			paddr += L1_CACHE_BYTES;
> +			paddr += CACHEOPS_L1_LINE_SZ;
>  		}
>  
>  		write_aux_reg(aux_cmd, vaddr);
> -		vaddr += L1_CACHE_BYTES;
> +		vaddr += CACHEOPS_L1_LINE_SZ;
>  	}
>  }
>  
> @@ -372,11 +388,11 @@ void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr,
>  	 *  -@sz will be integral multiple of line size (being page sized).
>  	 */
>  	if (!full_page) {
> -		sz += paddr & ~CACHE_LINE_MASK;
> -		paddr &= CACHE_LINE_MASK;
> +		sz += paddr & ~CACHEOPS_L1_LINE_MASK;
> +		paddr &= CACHEOPS_L1_LINE_MASK;
>  	}
>  
> -	num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
> +	num_lines = DIV_ROUND_UP(sz, CACHEOPS_L1_LINE_SZ);
>  
>  	/*
>  	 * For HS38 PAE40 configuration
> @@ -396,7 +412,7 @@ void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr,
>  
>  	while (num_lines-- > 0) {
>  		write_aux_reg(aux_cmd, paddr);
> -		paddr += L1_CACHE_BYTES;
> +		paddr += CACHEOPS_L1_LINE_SZ;
>  	}
>  }
>  
> @@ -422,14 +438,14 @@ void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr,
>  
>  	if (!full_page) {
>  		/* for any leading gap between @paddr and start of cache line */
> -		sz += paddr & ~CACHE_LINE_MASK;
> -		paddr &= CACHE_LINE_MASK;
> +		sz += paddr & ~CACHEOPS_L1_LINE_MASK;
> +		paddr &= CACHEOPS_L1_LINE_MASK;
>  
>  		/*
>  		 *  account for any trailing gap to end of cache line
>  		 *  this is equivalent to DIV_ROUND_UP() in line ops above
>  		 */
> -		sz += L1_CACHE_BYTES - 1;
> +		sz += CACHEOPS_L1_LINE_SZ - 1;
>  	}
>  
>  	if (is_pae40_enabled()) {
> @@ -1215,14 +1231,21 @@ static void arc_l1_line_check(unsigned int line_len, const char *cache_name)
>  		panic("%s support enabled but non-existent cache\n",
>  		      cache_name);
>  
> -	if (line_len != L1_CACHE_BYTES)
> +	/*
> +	 * In case of CONFIG_ARC_CACHE_LINE_AUTODETECT disabled we check
> +	 * that cache line size is equal to provided via Kconfig,
> +	 * in case of CONFIG_ARC_CACHE_LINE_AUTODETECT enabled we check
> +	 * that cache line size is equal for every L1 (I/D) cache on every cpu.
> +	 */
> +	if (line_len != CACHEOPS_L1_LINE_SZ)
>  		panic("%s line size [%u] != expected [%u]",
> -		      cache_name, line_len, L1_CACHE_BYTES);
> +		      cache_name, line_len, CACHEOPS_L1_LINE_SZ);
>  }
>  #endif /* IS_ENABLED(CONFIG_ARC_HAS_ICACHE) || IS_ENABLED(CONFIG_ARC_HAS_DCACHE) */
>  
>  /*
>   * Cache related boot time checks needed on every CPU.
> + * NOTE: This function expects 'l1_line_sz' to be set.
>   */
>  static void arc_l1_cache_check(unsigned int cpu)
>  {
> @@ -1239,12 +1262,28 @@ static void arc_l1_cache_check(unsigned int cpu)
>   * configuration (we validate this in arc_cache_check()):
>   *  - Geometry checks
>   *  - L1 cache line loop callbacks
> + *  - l1_line_sz / l1_line_mask setup
>   */
>  void __init arc_l1_cache_init_master(unsigned int cpu)
>  {
> +	/*
> +	 * 'l1_line_sz' programing model:
> +	 * We simplify programing of 'l1_line_sz' as we assume that we don't
> +	 * support case where CPU have different cache configuration.
> +	 * 1. Assign to 'l1_line_sz' length of any (I/D) L1 cache line of
> +	 *    master CPU.
> +	 * 2. Validate 'l1_line_sz' length itself.
> +	 * 3. Check that both L1 I$/D$ lines on each CPU are equal to
> +	 *    'l1_line_sz' (or to value provided via Kconfig in case of
> +	 *     CONFIG_ARC_CACHE_LINE_AUTODETECT is disabled). This is done in
> +	 *     arc_cache_check() which is called for each CPU.
> +	 */
> +
>  	if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE)) {
>  		struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache;
>  
> +		l1_line_sz = ic->line_len;
> +
>  		/*
>  		 * In MMU v4 (HS38x) the aliasing icache config uses IVIL/PTAG
>  		 * pair to provide vaddr/paddr respectively, just as in MMU v3
> @@ -1258,6 +1297,8 @@ void __init arc_l1_cache_init_master(unsigned int cpu)
>  	if (IS_ENABLED(CONFIG_ARC_HAS_DCACHE)) {
>  		struct cpuinfo_arc_cache *dc = &cpuinfo_arc700[cpu].dcache;
>  
> +		l1_line_sz = dc->line_len;
> +
>  		/* check for D-Cache aliasing on ARCompact: ARCv2 has PIPT */
>  		if (is_isa_arcompact()) {
>  			int handled = IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING);
> @@ -1274,6 +1315,14 @@ void __init arc_l1_cache_init_master(unsigned int cpu)
>  		}
>  	}
>  
> +	if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE) ||
> +	    IS_ENABLED(CONFIG_ARC_HAS_DCACHE)) {
> +		if (l1_line_sz != 32 && l1_line_sz != 64 && l1_line_sz != 128)
> +			panic("L1 cache line sz [%u] unsupported\n", l1_line_sz);
> +
> +		l1_line_mask = ~(l1_line_sz - 1);
> +	}
> +
>  	/*
>  	 * Check that SMP_CACHE_BYTES (and hence ARCH_DMA_MINALIGN) is larger
>  	 * or equal to any cache line length.


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 3/3] ARC: cache: allow to autodetect L1 cache line size
@ 2019-04-16 17:51     ` Vineet Gupta
  0 siblings, 0 replies; 12+ messages in thread
From: Vineet Gupta @ 2019-04-16 17:51 UTC (permalink / raw)
  To: linux-snps-arc

On 4/16/19 10:10 AM, Eugeniy Paltsev wrote:
> One step to "build once run anywhere"

This is what I'm afraid is going to happen. We will slowly sneak this into
defconfigs and this will become the default. Occasional need for verification
doesn't necessarily need this complexity to be maintained. Not all hacks need to
be upstream.

> Allow to autodetect L1 I/D caches line size in runtime instead of
> relying on value provided via Kconfig.
>
> This is controlled via CONFIG_ARC_CACHE_LINE_AUTODETECT Kconfig option
> which is disabled by default.
>  * In case of this option disabled there is no overhead in compare with
>    current implementation.
>  * In case of this option enabled there is some overhead in both speed
>    and code size:
>    - we use cache line size stored in the global variable instead of
>      compile time available define, so compiler can't do some
>      optimizations.
>    - we align all cache related buffers by maximum possible cache line
>      size. Nevertheless it isn't significant because we mostly use
>      SMP_CACHE_BYTES or ARCH_DMA_MINALIGN to align stuff (they are
>      equal to maximum possible cache line size)
>
> Main change is the split L1_CACHE_BYTES for two separate defines:
>  * L1_CACHE_BYTES      >= real L1 I$/D$ line size.
>    Available at compile time. Used for alligning stuff.
>  * CACHEOPS_L1_LINE_SZ == real L1 I$/D$ line size.
>    Available at run time. Used in operations with cache lines/regions.
>
> Signed-off-by: Eugeniy Paltsev <Eugeniy.Paltsev at synopsys.com>
> ---
>  arch/arc/Kconfig             | 10 +++++
>  arch/arc/include/asm/cache.h |  9 ++++-
>  arch/arc/lib/memset-archs.S  |  8 +++-
>  arch/arc/mm/cache.c          | 89 ++++++++++++++++++++++++++++++++++----------
>  4 files changed, 94 insertions(+), 22 deletions(-)
>
> diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
> index c781e45d1d99..e7eb5ff1485d 100644
> --- a/arch/arc/Kconfig
> +++ b/arch/arc/Kconfig
> @@ -215,10 +215,20 @@ menuconfig ARC_CACHE
>  
>  if ARC_CACHE
>  
> +config ARC_CACHE_LINE_AUTODETECT
> +	bool "Detect cache lines length automatically in runtime"
> +	depends on ARC_HAS_ICACHE || ARC_HAS_DCACHE
> +	help
> +	  ARC has configurable cache line length. Enable this option to detect
> +	  all cache lines length automatically in runtime to make kernel image
> +	  runnable on HW with different cache lines configuration.
> +	  If you don't know what the above means, leave this setting alone.
> +
>  config ARC_CACHE_LINE_SHIFT
>  	int "Cache Line Length (as power of 2)"
>  	range 5 7
>  	default "6"
> +	depends on !ARC_CACHE_LINE_AUTODETECT
>  	help
>  	  Starting with ARC700 4.9, Cache line length is configurable,
>  	  This option specifies "N", with Line-len = 2 power N
> diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h
> index f1642634aab0..0ff8e19008e4 100644
> --- a/arch/arc/include/asm/cache.h
> +++ b/arch/arc/include/asm/cache.h
> @@ -15,15 +15,22 @@
>  #define SMP_CACHE_BYTES		ARC_MAX_CACHE_BYTES
>  #define ARCH_DMA_MINALIGN	ARC_MAX_CACHE_BYTES
>  
> +#if IS_ENABLED(CONFIG_ARC_CACHE_LINE_AUTODETECT)
> +/*
> + * This must be used for aligning only. In case of cache line autodetect it is
> + * only safe to use maximum possible value here.
> + */
> +#define L1_CACHE_SHIFT		ARC_MAX_CACHE_SHIFT
> +#else
>  /* In case $$ not config, setup a dummy number for rest of kernel */
>  #ifndef CONFIG_ARC_CACHE_LINE_SHIFT
>  #define L1_CACHE_SHIFT		6
>  #else
>  #define L1_CACHE_SHIFT		CONFIG_ARC_CACHE_LINE_SHIFT
>  #endif
> +#endif /* IS_ENABLED(CONFIG_ARC_CACHE_LINE_AUTODETECT) */
>  
>  #define L1_CACHE_BYTES		(1 << L1_CACHE_SHIFT)
> -#define CACHE_LINE_MASK		(~(L1_CACHE_BYTES - 1))
>  
>  /*
>   * ARC700 doesn't cache any access in top 1G (0xc000_0000 to 0xFFFF_FFFF)
> diff --git a/arch/arc/lib/memset-archs.S b/arch/arc/lib/memset-archs.S
> index b3373f5c88e0..4baeeea29482 100644
> --- a/arch/arc/lib/memset-archs.S
> +++ b/arch/arc/lib/memset-archs.S
> @@ -16,9 +16,15 @@
>   * line lengths (32B and 128B) you should rewrite code carefully checking
>   * we don't call any prefetchw/prealloc instruction for L1 cache lines which
>   * don't belongs to memset area.
> + *
> + * TODO: FIXME: as for today we chose not optimized memset implementation if we
> + * enable ARC_CACHE_LINE_AUTODETECT option (as we don't know L1 cache line
> + * size in compile time).
> + * One possible way to fix this is to declare memset as a function pointer and
> + * update it when we discover actual cache line size.
>   */
>  
> -#if L1_CACHE_SHIFT == 6
> +#if (!IS_ENABLED(CONFIG_ARC_CACHE_LINE_AUTODETECT)) && (L1_CACHE_SHIFT == 6)
>  
>  .macro PREALLOC_INSTR	reg, off
>  	prealloc	[\reg, \off]

You really want verification testing to hit these instructions....

> diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
> index 1036bd56f518..8d006c1d12a1 100644
> --- a/arch/arc/mm/cache.c
> +++ b/arch/arc/mm/cache.c
> @@ -25,6 +25,22 @@
>  #define USE_RGN_FLSH	1
>  #endif
>  
> +/*
> + * Cache line defines and real L1 I$/D$ line size relations:
> + *
> + * L1_CACHE_BYTES      >= real L1 I$/D$ line size. Available at compile time.

Call it "Max possible line size" compile time constant

> + * CACHEOPS_L1_LINE_SZ == real L1 I$/D$ line size. Available at run time.
> + */
> +#if IS_ENABLED(CONFIG_ARC_CACHE_LINE_AUTODETECT)
> +#define CACHEOPS_L1_LINE_SZ		l1_line_sz
> +#define CACHEOPS_L1_LINE_MASK		l1_line_mask
> +#else
> +#define CACHEOPS_L1_LINE_SZ		L1_CACHE_BYTES
> +#define CACHEOPS_L1_LINE_MASK		(~((CACHEOPS_L1_LINE_SZ) - 1))
> +#endif /* IS_ENABLED(CONFIG_ARC_CACHE_LINE_AUTODETECT) */
> +
> +static unsigned int l1_line_sz;
> +static unsigned long l1_line_mask;
>  static int l2_line_sz;
>  static int ioc_exists;
>  int slc_enable = 1, ioc_enable = 1;
> @@ -256,19 +272,19 @@ void __cache_line_loop_v2(phys_addr_t paddr, unsigned long vaddr,
>  	 *  - at sz will be integral multiple of line size (being page sized).
>  	 */
>  	if (!full_page) {
> -		sz += paddr & ~CACHE_LINE_MASK;
> -		paddr &= CACHE_LINE_MASK;
> -		vaddr &= CACHE_LINE_MASK;
> +		sz += paddr & ~CACHEOPS_L1_LINE_MASK;
> +		paddr &= CACHEOPS_L1_LINE_MASK;
> +		vaddr &= CACHEOPS_L1_LINE_MASK;
>  	}
>  
> -	num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
> +	num_lines = DIV_ROUND_UP(sz, CACHEOPS_L1_LINE_SZ);
>  
>  	/* MMUv2 and before: paddr contains stuffed vaddrs bits */
>  	paddr |= (vaddr >> PAGE_SHIFT) & 0x1F;
>  
>  	while (num_lines-- > 0) {
>  		write_aux_reg(aux_cmd, paddr);
> -		paddr += L1_CACHE_BYTES;
> +		paddr += CACHEOPS_L1_LINE_SZ;
>  	}
>  }
>  
> @@ -302,11 +318,11 @@ void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr,
>  	 *  - at sz will be integral multiple of line size (being page sized).
>  	 */
>  	if (!full_page) {
> -		sz += paddr & ~CACHE_LINE_MASK;
> -		paddr &= CACHE_LINE_MASK;
> -		vaddr &= CACHE_LINE_MASK;
> +		sz += paddr & ~CACHEOPS_L1_LINE_MASK;
> +		paddr &= CACHEOPS_L1_LINE_MASK;
> +		vaddr &= CACHEOPS_L1_LINE_MASK;
>  	}
> -	num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
> +	num_lines = DIV_ROUND_UP(sz, CACHEOPS_L1_LINE_SZ);
>  
>  	/*
>  	 * MMUv3, cache ops require paddr in PTAG reg
> @@ -328,11 +344,11 @@ void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr,
>  	while (num_lines-- > 0) {
>  		if (!full_page) {
>  			write_aux_reg(aux_tag, paddr);
> -			paddr += L1_CACHE_BYTES;
> +			paddr += CACHEOPS_L1_LINE_SZ;
>  		}
>  
>  		write_aux_reg(aux_cmd, vaddr);
> -		vaddr += L1_CACHE_BYTES;
> +		vaddr += CACHEOPS_L1_LINE_SZ;
>  	}
>  }
>  
> @@ -372,11 +388,11 @@ void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr,
>  	 *  - at sz will be integral multiple of line size (being page sized).
>  	 */
>  	if (!full_page) {
> -		sz += paddr & ~CACHE_LINE_MASK;
> -		paddr &= CACHE_LINE_MASK;
> +		sz += paddr & ~CACHEOPS_L1_LINE_MASK;
> +		paddr &= CACHEOPS_L1_LINE_MASK;
>  	}
>  
> -	num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
> +	num_lines = DIV_ROUND_UP(sz, CACHEOPS_L1_LINE_SZ);
>  
>  	/*
>  	 * For HS38 PAE40 configuration
> @@ -396,7 +412,7 @@ void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr,
>  
>  	while (num_lines-- > 0) {
>  		write_aux_reg(aux_cmd, paddr);
> -		paddr += L1_CACHE_BYTES;
> +		paddr += CACHEOPS_L1_LINE_SZ;
>  	}
>  }
>  
> @@ -422,14 +438,14 @@ void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr,
>  
>  	if (!full_page) {
>  		/* for any leading gap between @paddr and start of cache line */
> -		sz += paddr & ~CACHE_LINE_MASK;
> -		paddr &= CACHE_LINE_MASK;
> +		sz += paddr & ~CACHEOPS_L1_LINE_MASK;
> +		paddr &= CACHEOPS_L1_LINE_MASK;
>  
>  		/*
>  		 *  account for any trailing gap to end of cache line
>  		 *  this is equivalent to DIV_ROUND_UP() in line ops above
>  		 */
> -		sz += L1_CACHE_BYTES - 1;
> +		sz += CACHEOPS_L1_LINE_SZ - 1;
>  	}
>  
>  	if (is_pae40_enabled()) {
> @@ -1215,14 +1231,21 @@ static void arc_l1_line_check(unsigned int line_len, const char *cache_name)
>  		panic("%s support enabled but non-existent cache\n",
>  		      cache_name);
>  
> -	if (line_len != L1_CACHE_BYTES)
> +	/*
> +	 * In case of CONFIG_ARC_CACHE_LINE_AUTODETECT disabled we check
> +	 * that cache line size is equal to provided via Kconfig,
> +	 * in case of CONFIG_ARC_CACHE_LINE_AUTODETECT enabled we check
> +	 * that cache line size is equal for every L1 (I/D) cache on every cpu.
> +	 */
> +	if (line_len != CACHEOPS_L1_LINE_SZ)
>  		panic("%s line size [%u] != expected [%u]",
> -		      cache_name, line_len, L1_CACHE_BYTES);
> +		      cache_name, line_len, CACHEOPS_L1_LINE_SZ);
>  }
>  #endif /* IS_ENABLED(CONFIG_ARC_HAS_ICACHE) || IS_ENABLED(CONFIG_ARC_HAS_DCACHE) */
>  
>  /*
>   * Cache related boot time checks needed on every CPU.
> + * NOTE: This function expects 'l1_line_sz' to be set.
>   */
>  static void arc_l1_cache_check(unsigned int cpu)
>  {
> @@ -1239,12 +1262,28 @@ static void arc_l1_cache_check(unsigned int cpu)
>   * configuration (we validate this in arc_cache_check()):
>   *  - Geometry checks
>   *  - L1 cache line loop callbacks
> + *  - l1_line_sz / l1_line_mask setup
>   */
>  void __init arc_l1_cache_init_master(unsigned int cpu)
>  {
> +	/*
> +	 * 'l1_line_sz' programing model:
> +	 * We simplify programing of 'l1_line_sz' as we assume that we don't
> +	 * support case where CPU have different cache configuration.
> +	 * 1. Assign to 'l1_line_sz' length of any (I/D) L1 cache line of
> +	 *    master CPU.
> +	 * 2. Validate 'l1_line_sz' length itself.
> +	 * 3. Check that both L1 I$/D$ lines on each CPU are equal to
> +	 *    'l1_line_sz' (or to value provided via Kconfig in case of
> +	 *     CONFIG_ARC_CACHE_LINE_AUTODETECT is disabled). This is done in
> +	 *     arc_cache_check() which is called for each CPU.
> +	 */
> +
>  	if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE)) {
>  		struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache;
>  
> +		l1_line_sz = ic->line_len;
> +
>  		/*
>  		 * In MMU v4 (HS38x) the aliasing icache config uses IVIL/PTAG
>  		 * pair to provide vaddr/paddr respectively, just as in MMU v3
> @@ -1258,6 +1297,8 @@ void __init arc_l1_cache_init_master(unsigned int cpu)
>  	if (IS_ENABLED(CONFIG_ARC_HAS_DCACHE)) {
>  		struct cpuinfo_arc_cache *dc = &cpuinfo_arc700[cpu].dcache;
>  
> +		l1_line_sz = dc->line_len;
> +
>  		/* check for D-Cache aliasing on ARCompact: ARCv2 has PIPT */
>  		if (is_isa_arcompact()) {
>  			int handled = IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING);
> @@ -1274,6 +1315,14 @@ void __init arc_l1_cache_init_master(unsigned int cpu)
>  		}
>  	}
>  
> +	if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE) ||
> +	    IS_ENABLED(CONFIG_ARC_HAS_DCACHE)) {
> +		if (l1_line_sz != 32 && l1_line_sz != 64 && l1_line_sz != 128)
> +			panic("L1 cache line sz [%u] unsupported\n", l1_line_sz);
> +
> +		l1_line_mask = ~(l1_line_sz - 1);
> +	}
> +
>  	/*
>  	 * Check that SMP_CACHE_BYTES (and hence ARCH_DMA_MINALIGN) is larger
>  	 * or equal to any cache line length.

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2019-04-16 17:51 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-04-16 17:10 [PATCH 0/3] ARC: cache: allow to autodetect L1 cache line size Eugeniy Paltsev
2019-04-16 17:10 ` Eugeniy Paltsev
2019-04-16 17:10 ` [PATCH 1/3] ARC: cache: declare cache-related defines via ARC_MAX_CACHE_SHIFT Eugeniy Paltsev
2019-04-16 17:10   ` Eugeniy Paltsev
2019-04-16 17:10 ` [PATCH 2/3] ARC: cache: check cache configuration on each CPU Eugeniy Paltsev
2019-04-16 17:10   ` Eugeniy Paltsev
2019-04-16 17:32   ` Vineet Gupta
2019-04-16 17:32     ` Vineet Gupta
2019-04-16 17:10 ` [PATCH 3/3] ARC: cache: allow to autodetect L1 cache line size Eugeniy Paltsev
2019-04-16 17:10   ` Eugeniy Paltsev
2019-04-16 17:51   ` Vineet Gupta
2019-04-16 17:51     ` Vineet Gupta

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.