All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] ARM: SH-Mobile: sh73a0: Secondary CPUs handle own SCU flags
@ 2012-12-06 12:08 ` Bastian Hecht
  0 siblings, 0 replies; 22+ messages in thread
From: Bastian Hecht @ 2012-12-06 12:08 UTC (permalink / raw)
  To: linux-arm-kernel

From: Bastian Hecht <hechtb@gmail.com>

When booting secondary CPUs we have used the main CPU to set up the
Snoop Control Unit flags of these CPUs. It is a cleaner approach
if every CPU takes care of its own flags. We avoid the need for
locking and the program logic is more concise. With this patch the file
headsmp-sh73a0.S is added that contains a startup vector for secondary CPUs
that sets up its own SCU flags.
Further in sh73a0_smp_prepare_cpus() we can rely on the generic ARM helper
scu_power_mode(). This is possible as we don't cross borders anymore (every
CPU handles its own flags) and need no locking. So we can throw out the
needless function modify_scu_cpu_psr().

Signed-off-by: Bastian Hecht <hechtb+renesas@gmail.com>
---
 arch/arm/mach-shmobile/Makefile              |    2 +-
 arch/arm/mach-shmobile/headsmp-sh73a0.S      |   50 ++++++++++++++++++++++++++
 arch/arm/mach-shmobile/include/mach/common.h |    1 +
 arch/arm/mach-shmobile/smp-sh73a0.c          |   30 +++-------------
 4 files changed, 56 insertions(+), 27 deletions(-)
 create mode 100644 arch/arm/mach-shmobile/headsmp-sh73a0.S

diff --git a/arch/arm/mach-shmobile/Makefile b/arch/arm/mach-shmobile/Makefile
index fe2c97c..a7beb61 100644
--- a/arch/arm/mach-shmobile/Makefile
+++ b/arch/arm/mach-shmobile/Makefile
@@ -17,7 +17,7 @@ obj-$(CONFIG_ARCH_EMEV2)	+= setup-emev2.o clock-emev2.o
 # SMP objects
 smp-y				:= platsmp.o headsmp.o
 smp-$(CONFIG_HOTPLUG_CPU)	+= hotplug.o
-smp-$(CONFIG_ARCH_SH73A0)	+= smp-sh73a0.o
+smp-$(CONFIG_ARCH_SH73A0)	+= smp-sh73a0.o headsmp-sh73a0.o
 smp-$(CONFIG_ARCH_R8A7779)	+= smp-r8a7779.o
 smp-$(CONFIG_ARCH_EMEV2)	+= smp-emev2.o
 
diff --git a/arch/arm/mach-shmobile/headsmp-sh73a0.S b/arch/arm/mach-shmobile/headsmp-sh73a0.S
new file mode 100644
index 0000000..bec4c0d
--- /dev/null
+++ b/arch/arm/mach-shmobile/headsmp-sh73a0.S
@@ -0,0 +1,50 @@
+/*
+ * SMP support for SoC sh73a0
+ *
+ * Copyright (C) 2012 Bastian Hecht
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR /PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/memory.h>
+
+	__CPUINIT
+/*
+ * Reset vector for secondary CPUs.
+ *
+ * First we turn on L1 cache coherency for our CPU. Then we jump to
+ * shmobile_invalidate_start that invalidates the cache and hands over control
+ * to the common ARM startup code.
+ * This function will be mapped to address 0 by the SBAR register.
+ * A normal branch is out of range here so we need a long jump. We jump to
+ * the physical address as the MMU is still turned off.
+ */
+	.align	12
+ENTRY(sh73a0_secondary_vector)
+	mrc     p15, 0, r0, c0, c0, 5	@ read MIPDR
+	and	r0, r0, #3		@ mask out cpu ID
+	lsl	r0, r0, #3		@ we will shift by cpu_id * 8 bits
+	mov	r1, #0xf0000000		@ SCU base address
+	ldr	r2, [r1, #8]		@ SCU Power Status Register
+	mov	r3, #3
+	bic	r2, r2, r3, lsl r0	@ Clear bits of our CPU (Run Mode)
+	str	r2, [r1, #8]		@ write back
+
+	ldr	pc, 1f
+1:	.long shmobile_invalidate_start - PAGE_OFFSET + PLAT_PHYS_OFFSET
+ENDPROC(sh73a0_secondary_vector)
diff --git a/arch/arm/mach-shmobile/include/mach/common.h b/arch/arm/mach-shmobile/include/mach/common.h
index d47e215..f2e2c29 100644
--- a/arch/arm/mach-shmobile/include/mach/common.h
+++ b/arch/arm/mach-shmobile/include/mach/common.h
@@ -54,6 +54,7 @@ extern void sh73a0_add_early_devices(void);
 extern void sh73a0_add_standard_devices(void);
 extern void sh73a0_clock_init(void);
 extern void sh73a0_pinmux_init(void);
+extern void sh73a0_secondary_vector(void);
 extern struct clk sh73a0_extal1_clk;
 extern struct clk sh73a0_extal2_clk;
 extern struct clk sh73a0_extcki_clk;
diff --git a/arch/arm/mach-shmobile/smp-sh73a0.c b/arch/arm/mach-shmobile/smp-sh73a0.c
index 624f00f..5e36f5d 100644
--- a/arch/arm/mach-shmobile/smp-sh73a0.c
+++ b/arch/arm/mach-shmobile/smp-sh73a0.c
@@ -41,9 +41,6 @@ static void __iomem *scu_base_addr(void)
 	return (void __iomem *)0xf0000000;
 }
 
-static DEFINE_SPINLOCK(scu_lock);
-static unsigned long tmp;
-
 #ifdef CONFIG_HAVE_ARM_TWD
 static DEFINE_TWD_LOCAL_TIMER(twd_local_timer, 0xf0000600, 29);
 void __init sh73a0_register_twd(void)
@@ -52,20 +49,6 @@ void __init sh73a0_register_twd(void)
 }
 #endif
 
-static void modify_scu_cpu_psr(unsigned long set, unsigned long clr)
-{
-	void __iomem *scu_base = scu_base_addr();
-
-	spin_lock(&scu_lock);
-	tmp = __raw_readl(scu_base + 8);
-	tmp &= ~clr;
-	tmp |= set;
-	spin_unlock(&scu_lock);
-
-	/* disable cache coherency after releasing the lock */
-	__raw_writel(tmp, scu_base + 8);
-}
-
 static unsigned int __init sh73a0_get_core_count(void)
 {
 	void __iomem *scu_base = scu_base_addr();
@@ -82,9 +65,6 @@ static int __cpuinit sh73a0_boot_secondary(unsigned int cpu, struct task_struct
 {
 	cpu = cpu_logical_map(cpu);
 
-	/* enable cache coherency */
-	modify_scu_cpu_psr(0, 3 << (cpu * 8));
-
 	if (((__raw_readl(PSTR) >> (4 * cpu)) & 3) = 3)
 		__raw_writel(1 << cpu, WUPCR);	/* wake up */
 	else
@@ -95,16 +75,14 @@ static int __cpuinit sh73a0_boot_secondary(unsigned int cpu, struct task_struct
 
 static void __init sh73a0_smp_prepare_cpus(unsigned int max_cpus)
 {
-	int cpu = cpu_logical_map(0);
-
 	scu_enable(scu_base_addr());
 
-	/* Map the reset vector (in headsmp.S) */
+	/* Map the reset vector (in headsmp-sh73a0.S) */
 	__raw_writel(0, APARMBAREA);      /* 4k */
-	__raw_writel(__pa(shmobile_secondary_vector), SBAR);
+	__raw_writel(__pa(sh73a0_secondary_vector), SBAR);
 
-	/* enable cache coherency on CPU0 */
-	modify_scu_cpu_psr(0, 3 << (cpu * 8));
+	/* enable cache coherency on booting CPU */
+	scu_power_mode(scu_base_addr(), SCU_PM_NORMAL);
 }
 
 static void __init sh73a0_smp_init_cpus(void)
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 1/2] ARM: SH-Mobile: sh73a0: Secondary CPUs handle own SCU flags
@ 2012-12-06 12:08 ` Bastian Hecht
  0 siblings, 0 replies; 22+ messages in thread
From: Bastian Hecht @ 2012-12-06 12:08 UTC (permalink / raw)
  To: linux-arm-kernel

From: Bastian Hecht <hechtb@gmail.com>

When booting secondary CPUs we have used the main CPU to set up the
Snoop Control Unit flags of these CPUs. It is a cleaner approach
if every CPU takes care of its own flags. We avoid the need for
locking and the program logic is more concise. With this patch the file
headsmp-sh73a0.S is added that contains a startup vector for secondary CPUs
that sets up its own SCU flags.
Further in sh73a0_smp_prepare_cpus() we can rely on the generic ARM helper
scu_power_mode(). This is possible as we don't cross borders anymore (every
CPU handles its own flags) and need no locking. So we can throw out the
needless function modify_scu_cpu_psr().

Signed-off-by: Bastian Hecht <hechtb+renesas@gmail.com>
---
 arch/arm/mach-shmobile/Makefile              |    2 +-
 arch/arm/mach-shmobile/headsmp-sh73a0.S      |   50 ++++++++++++++++++++++++++
 arch/arm/mach-shmobile/include/mach/common.h |    1 +
 arch/arm/mach-shmobile/smp-sh73a0.c          |   30 +++-------------
 4 files changed, 56 insertions(+), 27 deletions(-)
 create mode 100644 arch/arm/mach-shmobile/headsmp-sh73a0.S

diff --git a/arch/arm/mach-shmobile/Makefile b/arch/arm/mach-shmobile/Makefile
index fe2c97c..a7beb61 100644
--- a/arch/arm/mach-shmobile/Makefile
+++ b/arch/arm/mach-shmobile/Makefile
@@ -17,7 +17,7 @@ obj-$(CONFIG_ARCH_EMEV2)	+= setup-emev2.o clock-emev2.o
 # SMP objects
 smp-y				:= platsmp.o headsmp.o
 smp-$(CONFIG_HOTPLUG_CPU)	+= hotplug.o
-smp-$(CONFIG_ARCH_SH73A0)	+= smp-sh73a0.o
+smp-$(CONFIG_ARCH_SH73A0)	+= smp-sh73a0.o headsmp-sh73a0.o
 smp-$(CONFIG_ARCH_R8A7779)	+= smp-r8a7779.o
 smp-$(CONFIG_ARCH_EMEV2)	+= smp-emev2.o
 
diff --git a/arch/arm/mach-shmobile/headsmp-sh73a0.S b/arch/arm/mach-shmobile/headsmp-sh73a0.S
new file mode 100644
index 0000000..bec4c0d
--- /dev/null
+++ b/arch/arm/mach-shmobile/headsmp-sh73a0.S
@@ -0,0 +1,50 @@
+/*
+ * SMP support for SoC sh73a0
+ *
+ * Copyright (C) 2012 Bastian Hecht
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR /PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/memory.h>
+
+	__CPUINIT
+/*
+ * Reset vector for secondary CPUs.
+ *
+ * First we turn on L1 cache coherency for our CPU. Then we jump to
+ * shmobile_invalidate_start that invalidates the cache and hands over control
+ * to the common ARM startup code.
+ * This function will be mapped to address 0 by the SBAR register.
+ * A normal branch is out of range here so we need a long jump. We jump to
+ * the physical address as the MMU is still turned off.
+ */
+	.align	12
+ENTRY(sh73a0_secondary_vector)
+	mrc     p15, 0, r0, c0, c0, 5	@ read MIPDR
+	and	r0, r0, #3		@ mask out cpu ID
+	lsl	r0, r0, #3		@ we will shift by cpu_id * 8 bits
+	mov	r1, #0xf0000000		@ SCU base address
+	ldr	r2, [r1, #8]		@ SCU Power Status Register
+	mov	r3, #3
+	bic	r2, r2, r3, lsl r0	@ Clear bits of our CPU (Run Mode)
+	str	r2, [r1, #8]		@ write back
+
+	ldr	pc, 1f
+1:	.long shmobile_invalidate_start - PAGE_OFFSET + PLAT_PHYS_OFFSET
+ENDPROC(sh73a0_secondary_vector)
diff --git a/arch/arm/mach-shmobile/include/mach/common.h b/arch/arm/mach-shmobile/include/mach/common.h
index d47e215..f2e2c29 100644
--- a/arch/arm/mach-shmobile/include/mach/common.h
+++ b/arch/arm/mach-shmobile/include/mach/common.h
@@ -54,6 +54,7 @@ extern void sh73a0_add_early_devices(void);
 extern void sh73a0_add_standard_devices(void);
 extern void sh73a0_clock_init(void);
 extern void sh73a0_pinmux_init(void);
+extern void sh73a0_secondary_vector(void);
 extern struct clk sh73a0_extal1_clk;
 extern struct clk sh73a0_extal2_clk;
 extern struct clk sh73a0_extcki_clk;
diff --git a/arch/arm/mach-shmobile/smp-sh73a0.c b/arch/arm/mach-shmobile/smp-sh73a0.c
index 624f00f..5e36f5d 100644
--- a/arch/arm/mach-shmobile/smp-sh73a0.c
+++ b/arch/arm/mach-shmobile/smp-sh73a0.c
@@ -41,9 +41,6 @@ static void __iomem *scu_base_addr(void)
 	return (void __iomem *)0xf0000000;
 }
 
-static DEFINE_SPINLOCK(scu_lock);
-static unsigned long tmp;
-
 #ifdef CONFIG_HAVE_ARM_TWD
 static DEFINE_TWD_LOCAL_TIMER(twd_local_timer, 0xf0000600, 29);
 void __init sh73a0_register_twd(void)
@@ -52,20 +49,6 @@ void __init sh73a0_register_twd(void)
 }
 #endif
 
-static void modify_scu_cpu_psr(unsigned long set, unsigned long clr)
-{
-	void __iomem *scu_base = scu_base_addr();
-
-	spin_lock(&scu_lock);
-	tmp = __raw_readl(scu_base + 8);
-	tmp &= ~clr;
-	tmp |= set;
-	spin_unlock(&scu_lock);
-
-	/* disable cache coherency after releasing the lock */
-	__raw_writel(tmp, scu_base + 8);
-}
-
 static unsigned int __init sh73a0_get_core_count(void)
 {
 	void __iomem *scu_base = scu_base_addr();
@@ -82,9 +65,6 @@ static int __cpuinit sh73a0_boot_secondary(unsigned int cpu, struct task_struct
 {
 	cpu = cpu_logical_map(cpu);
 
-	/* enable cache coherency */
-	modify_scu_cpu_psr(0, 3 << (cpu * 8));
-
 	if (((__raw_readl(PSTR) >> (4 * cpu)) & 3) == 3)
 		__raw_writel(1 << cpu, WUPCR);	/* wake up */
 	else
@@ -95,16 +75,14 @@ static int __cpuinit sh73a0_boot_secondary(unsigned int cpu, struct task_struct
 
 static void __init sh73a0_smp_prepare_cpus(unsigned int max_cpus)
 {
-	int cpu = cpu_logical_map(0);
-
 	scu_enable(scu_base_addr());
 
-	/* Map the reset vector (in headsmp.S) */
+	/* Map the reset vector (in headsmp-sh73a0.S) */
 	__raw_writel(0, APARMBAREA);      /* 4k */
-	__raw_writel(__pa(shmobile_secondary_vector), SBAR);
+	__raw_writel(__pa(sh73a0_secondary_vector), SBAR);
 
-	/* enable cache coherency on CPU0 */
-	modify_scu_cpu_psr(0, 3 << (cpu * 8));
+	/* enable cache coherency on booting CPU */
+	scu_power_mode(scu_base_addr(), SCU_PM_NORMAL);
 }
 
 static void __init sh73a0_smp_init_cpus(void)
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 2/2] ARM: SH-Mobile: sh73a0: Add CPU Hotplug
  2012-12-06 12:08 ` Bastian Hecht
@ 2012-12-06 12:08   ` Bastian Hecht
  -1 siblings, 0 replies; 22+ messages in thread
From: Bastian Hecht @ 2012-12-06 12:08 UTC (permalink / raw)
  To: linux-arm-kernel

From: Bastian Hecht <hechtb@gmail.com>

Add the capability to add and remove CPUs on the fly.
The Cortex-A9 offers the possibility to take single cores out of the
MP Core. We add this capabilty taking care that caches are kept
coherent. For the actual shutdown via a WFI instruction, a code snippet
from the omap2 code tree is copied. Thanks for that! For verifying the
shutdown we rely on the internal SH73A0 Power Status Register
PSTR.

Signed-off-by: Bastian Hecht <hechtb+renesas@gmail.com>
---
 arch/arm/mach-shmobile/headsmp-sh73a0.S      |   46 ++++++++++++++++++++++++++
 arch/arm/mach-shmobile/include/mach/common.h |    1 +
 arch/arm/mach-shmobile/smp-sh73a0.c          |   41 +++++++++++++++++++----
 3 files changed, 82 insertions(+), 6 deletions(-)

diff --git a/arch/arm/mach-shmobile/headsmp-sh73a0.S b/arch/arm/mach-shmobile/headsmp-sh73a0.S
index bec4c0d..be463a3 100644
--- a/arch/arm/mach-shmobile/headsmp-sh73a0.S
+++ b/arch/arm/mach-shmobile/headsmp-sh73a0.S
@@ -23,6 +23,52 @@
 #include <linux/init.h>
 #include <asm/memory.h>
 
+/* Taken from arch/arm/mach-omap2/sleep44xx.S. Thanks! */
+ENTRY(sh73a0_do_wfi)
+        stmfd   sp!, {lr}
+
+        /*
+         * Execute an ISB instruction to ensure that all of the
+         * CP15 register changes have been committed.
+         */
+        isb
+
+        /*
+         * Execute a barrier instruction to ensure that all cache,
+         * TLB and branch predictor maintenance operations issued
+         * by any CPU in the cluster have completed.
+         */
+        dsb
+        dmb
+
+        /*
+         * Execute a WFI instruction and wait until the
+         * STANDBYWFI output is asserted to indicate that the
+         * CPU is in idle and low power state. CPU can specualatively
+         * prefetch the instructions so add NOPs after WFI. Sixteen
+         * NOPs as per Cortex-A9 pipeline.
+         */
+        wfi                                     @ Wait For Interrupt
+        nop
+        nop
+        nop
+        nop
+        nop
+        nop
+        nop
+        nop
+        nop
+        nop
+        nop
+        nop
+        nop
+        nop
+        nop
+        nop
+
+        ldmfd   sp!, {pc}
+ENDPROC(sh73a0_do_wfi)
+
 	__CPUINIT
 /*
  * Reset vector for secondary CPUs.
diff --git a/arch/arm/mach-shmobile/include/mach/common.h b/arch/arm/mach-shmobile/include/mach/common.h
index f2e2c29..40f767e 100644
--- a/arch/arm/mach-shmobile/include/mach/common.h
+++ b/arch/arm/mach-shmobile/include/mach/common.h
@@ -55,6 +55,7 @@ extern void sh73a0_add_standard_devices(void);
 extern void sh73a0_clock_init(void);
 extern void sh73a0_pinmux_init(void);
 extern void sh73a0_secondary_vector(void);
+extern void sh73a0_do_wfi(void);
 extern struct clk sh73a0_extal1_clk;
 extern struct clk sh73a0_extal2_clk;
 extern struct clk sh73a0_extcki_clk;
diff --git a/arch/arm/mach-shmobile/smp-sh73a0.c b/arch/arm/mach-shmobile/smp-sh73a0.c
index 5e36f5d..9237e13 100644
--- a/arch/arm/mach-shmobile/smp-sh73a0.c
+++ b/arch/arm/mach-shmobile/smp-sh73a0.c
@@ -24,6 +24,7 @@
 #include <linux/io.h>
 #include <linux/delay.h>
 #include <mach/common.h>
+#include <asm/cacheflush.h>
 #include <asm/smp_plat.h>
 #include <mach/sh73a0.h>
 #include <asm/smp_scu.h>
@@ -36,6 +37,8 @@
 #define SBAR		IOMEM(0xe6180020)
 #define APARMBAREA	IOMEM(0xe6f10020)
 
+#define PSTR_SHUTDOWN_MODE	3
+
 static void __iomem *scu_base_addr(void)
 {
 	return (void __iomem *)0xf0000000;
@@ -92,16 +95,20 @@ static void __init sh73a0_smp_init_cpus(void)
 	shmobile_smp_init_cpus(ncores);
 }
 
-static int __maybe_unused sh73a0_cpu_kill(unsigned int cpu)
+#ifdef CONFIG_HOTPLUG_CPU
+static int sh73a0_cpu_kill(unsigned int cpu)
 {
+
 	int k;
+	u32 pstr;
 
-	/* this function is running on another CPU than the offline target,
-	 * here we need wait for shutdown code in platform_cpu_die() to
-	 * finish before asking SoC-specific code to power off the CPU core.
+	/*
+	 * wait until the power status register confirms the shutdown of the
+	 * offline target
 	 */
 	for (k = 0; k < 1000; k++) {
-		if (shmobile_cpu_is_dead(cpu))
+		pstr = (__raw_readl(PSTR) >> (4 * cpu)) & 3;
+		if (pstr = PSTR_SHUTDOWN_MODE)
 			return 1;
 
 		mdelay(1);
@@ -110,6 +117,28 @@ static int __maybe_unused sh73a0_cpu_kill(unsigned int cpu)
 	return 0;
 }
 
+static void sh73a0_cpu_die(unsigned int cpu)
+{
+	/*
+	 * The ARM MPcore does not issue a cache coherency request for the L1
+	 * cache when powering off single CPUs. We must take care of this and
+	 * further caches.
+	 */
+	dsb();
+	flush_cache_all();
+
+	/* Set power off mode. This takes the CPU out of the MP cluster */
+	scu_power_mode(scu_base_addr(), SCU_PM_POWEROFF);
+
+	/* Enter shutdown mode */
+	sh73a0_do_wfi();
+
+	/* We assume success always. We never reach this */
+	pr_err("Shutting down CPU failed. This should never happen!\n");
+	for (;;)
+		;
+}
+#endif /* CONFIG_HOTPLUG_CPU */
 
 struct smp_operations sh73a0_smp_ops __initdata = {
 	.smp_init_cpus		= sh73a0_smp_init_cpus,
@@ -118,7 +147,7 @@ struct smp_operations sh73a0_smp_ops __initdata = {
 	.smp_boot_secondary	= sh73a0_boot_secondary,
 #ifdef CONFIG_HOTPLUG_CPU
 	.cpu_kill		= sh73a0_cpu_kill,
-	.cpu_die		= shmobile_cpu_die,
+	.cpu_die		= sh73a0_cpu_die,
 	.cpu_disable		= shmobile_cpu_disable,
 #endif
 };
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 2/2] ARM: SH-Mobile: sh73a0: Add CPU Hotplug
@ 2012-12-06 12:08   ` Bastian Hecht
  0 siblings, 0 replies; 22+ messages in thread
From: Bastian Hecht @ 2012-12-06 12:08 UTC (permalink / raw)
  To: linux-arm-kernel

From: Bastian Hecht <hechtb@gmail.com>

Add the capability to add and remove CPUs on the fly.
The Cortex-A9 offers the possibility to take single cores out of the
MP Core. We add this capabilty taking care that caches are kept
coherent. For the actual shutdown via a WFI instruction, a code snippet
from the omap2 code tree is copied. Thanks for that! For verifying the
shutdown we rely on the internal SH73A0 Power Status Register
PSTR.

Signed-off-by: Bastian Hecht <hechtb+renesas@gmail.com>
---
 arch/arm/mach-shmobile/headsmp-sh73a0.S      |   46 ++++++++++++++++++++++++++
 arch/arm/mach-shmobile/include/mach/common.h |    1 +
 arch/arm/mach-shmobile/smp-sh73a0.c          |   41 +++++++++++++++++++----
 3 files changed, 82 insertions(+), 6 deletions(-)

diff --git a/arch/arm/mach-shmobile/headsmp-sh73a0.S b/arch/arm/mach-shmobile/headsmp-sh73a0.S
index bec4c0d..be463a3 100644
--- a/arch/arm/mach-shmobile/headsmp-sh73a0.S
+++ b/arch/arm/mach-shmobile/headsmp-sh73a0.S
@@ -23,6 +23,52 @@
 #include <linux/init.h>
 #include <asm/memory.h>
 
+/* Taken from arch/arm/mach-omap2/sleep44xx.S. Thanks! */
+ENTRY(sh73a0_do_wfi)
+        stmfd   sp!, {lr}
+
+        /*
+         * Execute an ISB instruction to ensure that all of the
+         * CP15 register changes have been committed.
+         */
+        isb
+
+        /*
+         * Execute a barrier instruction to ensure that all cache,
+         * TLB and branch predictor maintenance operations issued
+         * by any CPU in the cluster have completed.
+         */
+        dsb
+        dmb
+
+        /*
+         * Execute a WFI instruction and wait until the
+         * STANDBYWFI output is asserted to indicate that the
+         * CPU is in idle and low power state. CPU can specualatively
+         * prefetch the instructions so add NOPs after WFI. Sixteen
+         * NOPs as per Cortex-A9 pipeline.
+         */
+        wfi                                     @ Wait For Interrupt
+        nop
+        nop
+        nop
+        nop
+        nop
+        nop
+        nop
+        nop
+        nop
+        nop
+        nop
+        nop
+        nop
+        nop
+        nop
+        nop
+
+        ldmfd   sp!, {pc}
+ENDPROC(sh73a0_do_wfi)
+
 	__CPUINIT
 /*
  * Reset vector for secondary CPUs.
diff --git a/arch/arm/mach-shmobile/include/mach/common.h b/arch/arm/mach-shmobile/include/mach/common.h
index f2e2c29..40f767e 100644
--- a/arch/arm/mach-shmobile/include/mach/common.h
+++ b/arch/arm/mach-shmobile/include/mach/common.h
@@ -55,6 +55,7 @@ extern void sh73a0_add_standard_devices(void);
 extern void sh73a0_clock_init(void);
 extern void sh73a0_pinmux_init(void);
 extern void sh73a0_secondary_vector(void);
+extern void sh73a0_do_wfi(void);
 extern struct clk sh73a0_extal1_clk;
 extern struct clk sh73a0_extal2_clk;
 extern struct clk sh73a0_extcki_clk;
diff --git a/arch/arm/mach-shmobile/smp-sh73a0.c b/arch/arm/mach-shmobile/smp-sh73a0.c
index 5e36f5d..9237e13 100644
--- a/arch/arm/mach-shmobile/smp-sh73a0.c
+++ b/arch/arm/mach-shmobile/smp-sh73a0.c
@@ -24,6 +24,7 @@
 #include <linux/io.h>
 #include <linux/delay.h>
 #include <mach/common.h>
+#include <asm/cacheflush.h>
 #include <asm/smp_plat.h>
 #include <mach/sh73a0.h>
 #include <asm/smp_scu.h>
@@ -36,6 +37,8 @@
 #define SBAR		IOMEM(0xe6180020)
 #define APARMBAREA	IOMEM(0xe6f10020)
 
+#define PSTR_SHUTDOWN_MODE	3
+
 static void __iomem *scu_base_addr(void)
 {
 	return (void __iomem *)0xf0000000;
@@ -92,16 +95,20 @@ static void __init sh73a0_smp_init_cpus(void)
 	shmobile_smp_init_cpus(ncores);
 }
 
-static int __maybe_unused sh73a0_cpu_kill(unsigned int cpu)
+#ifdef CONFIG_HOTPLUG_CPU
+static int sh73a0_cpu_kill(unsigned int cpu)
 {
+
 	int k;
+	u32 pstr;
 
-	/* this function is running on another CPU than the offline target,
-	 * here we need wait for shutdown code in platform_cpu_die() to
-	 * finish before asking SoC-specific code to power off the CPU core.
+	/*
+	 * wait until the power status register confirms the shutdown of the
+	 * offline target
 	 */
 	for (k = 0; k < 1000; k++) {
-		if (shmobile_cpu_is_dead(cpu))
+		pstr = (__raw_readl(PSTR) >> (4 * cpu)) & 3;
+		if (pstr == PSTR_SHUTDOWN_MODE)
 			return 1;
 
 		mdelay(1);
@@ -110,6 +117,28 @@ static int __maybe_unused sh73a0_cpu_kill(unsigned int cpu)
 	return 0;
 }
 
+static void sh73a0_cpu_die(unsigned int cpu)
+{
+	/*
+	 * The ARM MPcore does not issue a cache coherency request for the L1
+	 * cache when powering off single CPUs. We must take care of this and
+	 * further caches.
+	 */
+	dsb();
+	flush_cache_all();
+
+	/* Set power off mode. This takes the CPU out of the MP cluster */
+	scu_power_mode(scu_base_addr(), SCU_PM_POWEROFF);
+
+	/* Enter shutdown mode */
+	sh73a0_do_wfi();
+
+	/* We assume success always. We never reach this */
+	pr_err("Shutting down CPU failed. This should never happen!\n");
+	for (;;)
+		;
+}
+#endif /* CONFIG_HOTPLUG_CPU */
 
 struct smp_operations sh73a0_smp_ops __initdata = {
 	.smp_init_cpus		= sh73a0_smp_init_cpus,
@@ -118,7 +147,7 @@ struct smp_operations sh73a0_smp_ops __initdata = {
 	.smp_boot_secondary	= sh73a0_boot_secondary,
 #ifdef CONFIG_HOTPLUG_CPU
 	.cpu_kill		= sh73a0_cpu_kill,
-	.cpu_die		= shmobile_cpu_die,
+	.cpu_die		= sh73a0_cpu_die,
 	.cpu_disable		= shmobile_cpu_disable,
 #endif
 };
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/2] ARM: SH-Mobile: sh73a0: Secondary CPUs handle own SCU flags
  2012-12-06 12:08 ` Bastian Hecht
@ 2012-12-14  3:20   ` Magnus Damm
  -1 siblings, 0 replies; 22+ messages in thread
From: Magnus Damm @ 2012-12-14  3:20 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, Dec 6, 2012 at 9:08 PM, Bastian Hecht <hechtb@gmail.com> wrote:
> From: Bastian Hecht <hechtb@gmail.com>
>
> When booting secondary CPUs we have used the main CPU to set up the
> Snoop Control Unit flags of these CPUs. It is a cleaner approach
> if every CPU takes care of its own flags. We avoid the need for
> locking and the program logic is more concise. With this patch the file
> headsmp-sh73a0.S is added that contains a startup vector for secondary CPUs
> that sets up its own SCU flags.
> Further in sh73a0_smp_prepare_cpus() we can rely on the generic ARM helper
> scu_power_mode(). This is possible as we don't cross borders anymore (every
> CPU handles its own flags) and need no locking. So we can throw out the
> needless function modify_scu_cpu_psr().
>
> Signed-off-by: Bastian Hecht <hechtb+renesas@gmail.com>

Acked-by: Magnus Damm <damm@opensource.se>

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH 1/2] ARM: SH-Mobile: sh73a0: Secondary CPUs handle own SCU flags
@ 2012-12-14  3:20   ` Magnus Damm
  0 siblings, 0 replies; 22+ messages in thread
From: Magnus Damm @ 2012-12-14  3:20 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, Dec 6, 2012 at 9:08 PM, Bastian Hecht <hechtb@gmail.com> wrote:
> From: Bastian Hecht <hechtb@gmail.com>
>
> When booting secondary CPUs we have used the main CPU to set up the
> Snoop Control Unit flags of these CPUs. It is a cleaner approach
> if every CPU takes care of its own flags. We avoid the need for
> locking and the program logic is more concise. With this patch the file
> headsmp-sh73a0.S is added that contains a startup vector for secondary CPUs
> that sets up its own SCU flags.
> Further in sh73a0_smp_prepare_cpus() we can rely on the generic ARM helper
> scu_power_mode(). This is possible as we don't cross borders anymore (every
> CPU handles its own flags) and need no locking. So we can throw out the
> needless function modify_scu_cpu_psr().
>
> Signed-off-by: Bastian Hecht <hechtb+renesas@gmail.com>

Acked-by: Magnus Damm <damm@opensource.se>

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/2] ARM: SH-Mobile: sh73a0: Add CPU Hotplug
  2012-12-06 12:08   ` Bastian Hecht
@ 2012-12-14  3:21     ` Magnus Damm
  -1 siblings, 0 replies; 22+ messages in thread
From: Magnus Damm @ 2012-12-14  3:21 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, Dec 6, 2012 at 9:08 PM, Bastian Hecht <hechtb@gmail.com> wrote:
> From: Bastian Hecht <hechtb@gmail.com>
>
> Add the capability to add and remove CPUs on the fly.
> The Cortex-A9 offers the possibility to take single cores out of the
> MP Core. We add this capabilty taking care that caches are kept
> coherent. For the actual shutdown via a WFI instruction, a code snippet
> from the omap2 code tree is copied. Thanks for that! For verifying the
> shutdown we rely on the internal SH73A0 Power Status Register
> PSTR.
>
> Signed-off-by: Bastian Hecht <hechtb+renesas@gmail.com>

Acked-by: Magnus Damm <damm@opensource.se>

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH 2/2] ARM: SH-Mobile: sh73a0: Add CPU Hotplug
@ 2012-12-14  3:21     ` Magnus Damm
  0 siblings, 0 replies; 22+ messages in thread
From: Magnus Damm @ 2012-12-14  3:21 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, Dec 6, 2012 at 9:08 PM, Bastian Hecht <hechtb@gmail.com> wrote:
> From: Bastian Hecht <hechtb@gmail.com>
>
> Add the capability to add and remove CPUs on the fly.
> The Cortex-A9 offers the possibility to take single cores out of the
> MP Core. We add this capabilty taking care that caches are kept
> coherent. For the actual shutdown via a WFI instruction, a code snippet
> from the omap2 code tree is copied. Thanks for that! For verifying the
> shutdown we rely on the internal SH73A0 Power Status Register
> PSTR.
>
> Signed-off-by: Bastian Hecht <hechtb+renesas@gmail.com>

Acked-by: Magnus Damm <damm@opensource.se>

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/2] ARM: SH-Mobile: sh73a0: Secondary CPUs handle own SCU flags
  2012-12-14  3:20   ` Magnus Damm
@ 2012-12-14 13:47     ` Simon Horman
  -1 siblings, 0 replies; 22+ messages in thread
From: Simon Horman @ 2012-12-14 13:47 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, Dec 14, 2012 at 12:20:29PM +0900, Magnus Damm wrote:
> On Thu, Dec 6, 2012 at 9:08 PM, Bastian Hecht <hechtb@gmail.com> wrote:
> > From: Bastian Hecht <hechtb@gmail.com>
> >
> > When booting secondary CPUs we have used the main CPU to set up the
> > Snoop Control Unit flags of these CPUs. It is a cleaner approach
> > if every CPU takes care of its own flags. We avoid the need for
> > locking and the program logic is more concise. With this patch the file
> > headsmp-sh73a0.S is added that contains a startup vector for secondary CPUs
> > that sets up its own SCU flags.
> > Further in sh73a0_smp_prepare_cpus() we can rely on the generic ARM helper
> > scu_power_mode(). This is possible as we don't cross borders anymore (every
> > CPU handles its own flags) and need no locking. So we can throw out the
> > needless function modify_scu_cpu_psr().
> >
> > Signed-off-by: Bastian Hecht <hechtb+renesas@gmail.com>
> 
> Acked-by: Magnus Damm <damm@opensource.se>

Hi Bastian,

could you please re-spin this series on top of the soc5 or next
branches of my renesas tree on kernel.org?

Feel free to include Magnus's Ack unless you make any
non-trivial changes.

Thanks:wq

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH 1/2] ARM: SH-Mobile: sh73a0: Secondary CPUs handle own SCU flags
@ 2012-12-14 13:47     ` Simon Horman
  0 siblings, 0 replies; 22+ messages in thread
From: Simon Horman @ 2012-12-14 13:47 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, Dec 14, 2012 at 12:20:29PM +0900, Magnus Damm wrote:
> On Thu, Dec 6, 2012 at 9:08 PM, Bastian Hecht <hechtb@gmail.com> wrote:
> > From: Bastian Hecht <hechtb@gmail.com>
> >
> > When booting secondary CPUs we have used the main CPU to set up the
> > Snoop Control Unit flags of these CPUs. It is a cleaner approach
> > if every CPU takes care of its own flags. We avoid the need for
> > locking and the program logic is more concise. With this patch the file
> > headsmp-sh73a0.S is added that contains a startup vector for secondary CPUs
> > that sets up its own SCU flags.
> > Further in sh73a0_smp_prepare_cpus() we can rely on the generic ARM helper
> > scu_power_mode(). This is possible as we don't cross borders anymore (every
> > CPU handles its own flags) and need no locking. So we can throw out the
> > needless function modify_scu_cpu_psr().
> >
> > Signed-off-by: Bastian Hecht <hechtb+renesas@gmail.com>
> 
> Acked-by: Magnus Damm <damm@opensource.se>

Hi Bastian,

could you please re-spin this series on top of the soc5 or next
branches of my renesas tree on kernel.org?

Feel free to include Magnus's Ack unless you make any
non-trivial changes.

Thanks:wq

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/2] ARM: SH-Mobile: sh73a0: Secondary CPUs handle own SCU flags
  2012-12-06 12:08 ` Bastian Hecht
@ 2012-12-14 14:06   ` Rob Herring
  -1 siblings, 0 replies; 22+ messages in thread
From: Rob Herring @ 2012-12-14 14:06 UTC (permalink / raw)
  To: linux-arm-kernel

On 12/06/2012 06:08 AM, Bastian Hecht wrote:
> From: Bastian Hecht <hechtb@gmail.com>
> 
> When booting secondary CPUs we have used the main CPU to set up the
> Snoop Control Unit flags of these CPUs. It is a cleaner approach
> if every CPU takes care of its own flags. We avoid the need for
> locking and the program logic is more concise. With this patch the file
> headsmp-sh73a0.S is added that contains a startup vector for secondary CPUs
> that sets up its own SCU flags.
> Further in sh73a0_smp_prepare_cpus() we can rely on the generic ARM helper
> scu_power_mode(). This is possible as we don't cross borders anymore (every
> CPU handles its own flags) and need no locking. So we can throw out the
> needless function modify_scu_cpu_psr().
> 
> Signed-off-by: Bastian Hecht <hechtb+renesas@gmail.com>
> ---
>  arch/arm/mach-shmobile/Makefile              |    2 +-
>  arch/arm/mach-shmobile/headsmp-sh73a0.S      |   50 ++++++++++++++++++++++++++
>  arch/arm/mach-shmobile/include/mach/common.h |    1 +
>  arch/arm/mach-shmobile/smp-sh73a0.c          |   30 +++-------------
>  4 files changed, 56 insertions(+), 27 deletions(-)
>  create mode 100644 arch/arm/mach-shmobile/headsmp-sh73a0.S
> 
> diff --git a/arch/arm/mach-shmobile/Makefile b/arch/arm/mach-shmobile/Makefile
> index fe2c97c..a7beb61 100644
> --- a/arch/arm/mach-shmobile/Makefile
> +++ b/arch/arm/mach-shmobile/Makefile
> @@ -17,7 +17,7 @@ obj-$(CONFIG_ARCH_EMEV2)	+= setup-emev2.o clock-emev2.o
>  # SMP objects
>  smp-y				:= platsmp.o headsmp.o
>  smp-$(CONFIG_HOTPLUG_CPU)	+= hotplug.o
> -smp-$(CONFIG_ARCH_SH73A0)	+= smp-sh73a0.o
> +smp-$(CONFIG_ARCH_SH73A0)	+= smp-sh73a0.o headsmp-sh73a0.o
>  smp-$(CONFIG_ARCH_R8A7779)	+= smp-r8a7779.o
>  smp-$(CONFIG_ARCH_EMEV2)	+= smp-emev2.o
>  
> diff --git a/arch/arm/mach-shmobile/headsmp-sh73a0.S b/arch/arm/mach-shmobile/headsmp-sh73a0.S
> new file mode 100644
> index 0000000..bec4c0d
> --- /dev/null
> +++ b/arch/arm/mach-shmobile/headsmp-sh73a0.S
> @@ -0,0 +1,50 @@
> +/*
> + * SMP support for SoC sh73a0
> + *
> + * Copyright (C) 2012 Bastian Hecht
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License as
> + * published by the Free Software Foundation; either version 2 of
> + * the License, or (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR /PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
> + * MA 02111-1307 USA
> + */
> +
> +#include <linux/linkage.h>
> +#include <linux/init.h>
> +#include <asm/memory.h>
> +
> +	__CPUINIT
> +/*
> + * Reset vector for secondary CPUs.
> + *
> + * First we turn on L1 cache coherency for our CPU. Then we jump to
> + * shmobile_invalidate_start that invalidates the cache and hands over control
> + * to the common ARM startup code.
> + * This function will be mapped to address 0 by the SBAR register.
> + * A normal branch is out of range here so we need a long jump. We jump to
> + * the physical address as the MMU is still turned off.
> + */
> +	.align	12
> +ENTRY(sh73a0_secondary_vector)
> +	mrc     p15, 0, r0, c0, c0, 5	@ read MIPDR
> +	and	r0, r0, #3		@ mask out cpu ID
> +	lsl	r0, r0, #3		@ we will shift by cpu_id * 8 bits
> +	mov	r1, #0xf0000000		@ SCU base address


> +	ldr	r2, [r1, #8]		@ SCU Power Status Register
> +	mov	r3, #3
> +	bic	r2, r2, r3, lsl r0	@ Clear bits of our CPU (Run Mode)
> +	str	r2, [r1, #8]		@ write back
> +
> +	ldr	pc, 1f
> +1:	.long shmobile_invalidate_start - PAGE_OFFSET + PLAT_PHYS_OFFSET
> +ENDPROC(sh73a0_secondary_vector)
> diff --git a/arch/arm/mach-shmobile/include/mach/common.h b/arch/arm/mach-shmobile/include/mach/common.h
> index d47e215..f2e2c29 100644
> --- a/arch/arm/mach-shmobile/include/mach/common.h
> +++ b/arch/arm/mach-shmobile/include/mach/common.h
> @@ -54,6 +54,7 @@ extern void sh73a0_add_early_devices(void);
>  extern void sh73a0_add_standard_devices(void);
>  extern void sh73a0_clock_init(void);
>  extern void sh73a0_pinmux_init(void);
> +extern void sh73a0_secondary_vector(void);
>  extern struct clk sh73a0_extal1_clk;
>  extern struct clk sh73a0_extal2_clk;
>  extern struct clk sh73a0_extcki_clk;
> diff --git a/arch/arm/mach-shmobile/smp-sh73a0.c b/arch/arm/mach-shmobile/smp-sh73a0.c
> index 624f00f..5e36f5d 100644
> --- a/arch/arm/mach-shmobile/smp-sh73a0.c
> +++ b/arch/arm/mach-shmobile/smp-sh73a0.c
> @@ -41,9 +41,6 @@ static void __iomem *scu_base_addr(void)
>  	return (void __iomem *)0xf0000000;
>  }
>  
> -static DEFINE_SPINLOCK(scu_lock);
> -static unsigned long tmp;
> -
>  #ifdef CONFIG_HAVE_ARM_TWD
>  static DEFINE_TWD_LOCAL_TIMER(twd_local_timer, 0xf0000600, 29);
>  void __init sh73a0_register_twd(void)
> @@ -52,20 +49,6 @@ void __init sh73a0_register_twd(void)
>  }
>  #endif
>  
> -static void modify_scu_cpu_psr(unsigned long set, unsigned long clr)
> -{
> -	void __iomem *scu_base = scu_base_addr();
> -
> -	spin_lock(&scu_lock);
> -	tmp = __raw_readl(scu_base + 8);
> -	tmp &= ~clr;
> -	tmp |= set;
> -	spin_unlock(&scu_lock);
> -
> -	/* disable cache coherency after releasing the lock */
> -	__raw_writel(tmp, scu_base + 8);

None of this locking was needed as the power status register is byte
accessible.

> -}
> -
>  static unsigned int __init sh73a0_get_core_count(void)
>  {
>  	void __iomem *scu_base = scu_base_addr();
> @@ -82,9 +65,6 @@ static int __cpuinit sh73a0_boot_secondary(unsigned int cpu, struct task_struct
>  {
>  	cpu = cpu_logical_map(cpu);
>  
> -	/* enable cache coherency */
> -	modify_scu_cpu_psr(0, 3 << (cpu * 8));
> -

So simply changing this to scu_power_mode call would accomplish the same
thing and avoid all the assembly.

Rob

>  	if (((__raw_readl(PSTR) >> (4 * cpu)) & 3) = 3)
>  		__raw_writel(1 << cpu, WUPCR);	/* wake up */
>  	else
> @@ -95,16 +75,14 @@ static int __cpuinit sh73a0_boot_secondary(unsigned int cpu, struct task_struct
>  
>  static void __init sh73a0_smp_prepare_cpus(unsigned int max_cpus)
>  {
> -	int cpu = cpu_logical_map(0);
> -
>  	scu_enable(scu_base_addr());
>  
> -	/* Map the reset vector (in headsmp.S) */
> +	/* Map the reset vector (in headsmp-sh73a0.S) */
>  	__raw_writel(0, APARMBAREA);      /* 4k */
> -	__raw_writel(__pa(shmobile_secondary_vector), SBAR);
> +	__raw_writel(__pa(sh73a0_secondary_vector), SBAR);
>  
> -	/* enable cache coherency on CPU0 */
> -	modify_scu_cpu_psr(0, 3 << (cpu * 8));
> +	/* enable cache coherency on booting CPU */
> +	scu_power_mode(scu_base_addr(), SCU_PM_NORMAL);
>  }
>  
>  static void __init sh73a0_smp_init_cpus(void)
> 


^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH 1/2] ARM: SH-Mobile: sh73a0: Secondary CPUs handle own SCU flags
@ 2012-12-14 14:06   ` Rob Herring
  0 siblings, 0 replies; 22+ messages in thread
From: Rob Herring @ 2012-12-14 14:06 UTC (permalink / raw)
  To: linux-arm-kernel

On 12/06/2012 06:08 AM, Bastian Hecht wrote:
> From: Bastian Hecht <hechtb@gmail.com>
> 
> When booting secondary CPUs we have used the main CPU to set up the
> Snoop Control Unit flags of these CPUs. It is a cleaner approach
> if every CPU takes care of its own flags. We avoid the need for
> locking and the program logic is more concise. With this patch the file
> headsmp-sh73a0.S is added that contains a startup vector for secondary CPUs
> that sets up its own SCU flags.
> Further in sh73a0_smp_prepare_cpus() we can rely on the generic ARM helper
> scu_power_mode(). This is possible as we don't cross borders anymore (every
> CPU handles its own flags) and need no locking. So we can throw out the
> needless function modify_scu_cpu_psr().
> 
> Signed-off-by: Bastian Hecht <hechtb+renesas@gmail.com>
> ---
>  arch/arm/mach-shmobile/Makefile              |    2 +-
>  arch/arm/mach-shmobile/headsmp-sh73a0.S      |   50 ++++++++++++++++++++++++++
>  arch/arm/mach-shmobile/include/mach/common.h |    1 +
>  arch/arm/mach-shmobile/smp-sh73a0.c          |   30 +++-------------
>  4 files changed, 56 insertions(+), 27 deletions(-)
>  create mode 100644 arch/arm/mach-shmobile/headsmp-sh73a0.S
> 
> diff --git a/arch/arm/mach-shmobile/Makefile b/arch/arm/mach-shmobile/Makefile
> index fe2c97c..a7beb61 100644
> --- a/arch/arm/mach-shmobile/Makefile
> +++ b/arch/arm/mach-shmobile/Makefile
> @@ -17,7 +17,7 @@ obj-$(CONFIG_ARCH_EMEV2)	+= setup-emev2.o clock-emev2.o
>  # SMP objects
>  smp-y				:= platsmp.o headsmp.o
>  smp-$(CONFIG_HOTPLUG_CPU)	+= hotplug.o
> -smp-$(CONFIG_ARCH_SH73A0)	+= smp-sh73a0.o
> +smp-$(CONFIG_ARCH_SH73A0)	+= smp-sh73a0.o headsmp-sh73a0.o
>  smp-$(CONFIG_ARCH_R8A7779)	+= smp-r8a7779.o
>  smp-$(CONFIG_ARCH_EMEV2)	+= smp-emev2.o
>  
> diff --git a/arch/arm/mach-shmobile/headsmp-sh73a0.S b/arch/arm/mach-shmobile/headsmp-sh73a0.S
> new file mode 100644
> index 0000000..bec4c0d
> --- /dev/null
> +++ b/arch/arm/mach-shmobile/headsmp-sh73a0.S
> @@ -0,0 +1,50 @@
> +/*
> + * SMP support for SoC sh73a0
> + *
> + * Copyright (C) 2012 Bastian Hecht
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License as
> + * published by the Free Software Foundation; either version 2 of
> + * the License, or (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR /PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
> + * MA 02111-1307 USA
> + */
> +
> +#include <linux/linkage.h>
> +#include <linux/init.h>
> +#include <asm/memory.h>
> +
> +	__CPUINIT
> +/*
> + * Reset vector for secondary CPUs.
> + *
> + * First we turn on L1 cache coherency for our CPU. Then we jump to
> + * shmobile_invalidate_start that invalidates the cache and hands over control
> + * to the common ARM startup code.
> + * This function will be mapped to address 0 by the SBAR register.
> + * A normal branch is out of range here so we need a long jump. We jump to
> + * the physical address as the MMU is still turned off.
> + */
> +	.align	12
> +ENTRY(sh73a0_secondary_vector)
> +	mrc     p15, 0, r0, c0, c0, 5	@ read MIPDR
> +	and	r0, r0, #3		@ mask out cpu ID
> +	lsl	r0, r0, #3		@ we will shift by cpu_id * 8 bits
> +	mov	r1, #0xf0000000		@ SCU base address


> +	ldr	r2, [r1, #8]		@ SCU Power Status Register
> +	mov	r3, #3
> +	bic	r2, r2, r3, lsl r0	@ Clear bits of our CPU (Run Mode)
> +	str	r2, [r1, #8]		@ write back
> +
> +	ldr	pc, 1f
> +1:	.long shmobile_invalidate_start - PAGE_OFFSET + PLAT_PHYS_OFFSET
> +ENDPROC(sh73a0_secondary_vector)
> diff --git a/arch/arm/mach-shmobile/include/mach/common.h b/arch/arm/mach-shmobile/include/mach/common.h
> index d47e215..f2e2c29 100644
> --- a/arch/arm/mach-shmobile/include/mach/common.h
> +++ b/arch/arm/mach-shmobile/include/mach/common.h
> @@ -54,6 +54,7 @@ extern void sh73a0_add_early_devices(void);
>  extern void sh73a0_add_standard_devices(void);
>  extern void sh73a0_clock_init(void);
>  extern void sh73a0_pinmux_init(void);
> +extern void sh73a0_secondary_vector(void);
>  extern struct clk sh73a0_extal1_clk;
>  extern struct clk sh73a0_extal2_clk;
>  extern struct clk sh73a0_extcki_clk;
> diff --git a/arch/arm/mach-shmobile/smp-sh73a0.c b/arch/arm/mach-shmobile/smp-sh73a0.c
> index 624f00f..5e36f5d 100644
> --- a/arch/arm/mach-shmobile/smp-sh73a0.c
> +++ b/arch/arm/mach-shmobile/smp-sh73a0.c
> @@ -41,9 +41,6 @@ static void __iomem *scu_base_addr(void)
>  	return (void __iomem *)0xf0000000;
>  }
>  
> -static DEFINE_SPINLOCK(scu_lock);
> -static unsigned long tmp;
> -
>  #ifdef CONFIG_HAVE_ARM_TWD
>  static DEFINE_TWD_LOCAL_TIMER(twd_local_timer, 0xf0000600, 29);
>  void __init sh73a0_register_twd(void)
> @@ -52,20 +49,6 @@ void __init sh73a0_register_twd(void)
>  }
>  #endif
>  
> -static void modify_scu_cpu_psr(unsigned long set, unsigned long clr)
> -{
> -	void __iomem *scu_base = scu_base_addr();
> -
> -	spin_lock(&scu_lock);
> -	tmp = __raw_readl(scu_base + 8);
> -	tmp &= ~clr;
> -	tmp |= set;
> -	spin_unlock(&scu_lock);
> -
> -	/* disable cache coherency after releasing the lock */
> -	__raw_writel(tmp, scu_base + 8);

None of this locking was needed as the power status register is byte
accessible.

> -}
> -
>  static unsigned int __init sh73a0_get_core_count(void)
>  {
>  	void __iomem *scu_base = scu_base_addr();
> @@ -82,9 +65,6 @@ static int __cpuinit sh73a0_boot_secondary(unsigned int cpu, struct task_struct
>  {
>  	cpu = cpu_logical_map(cpu);
>  
> -	/* enable cache coherency */
> -	modify_scu_cpu_psr(0, 3 << (cpu * 8));
> -

So simply changing this to scu_power_mode call would accomplish the same
thing and avoid all the assembly.

Rob

>  	if (((__raw_readl(PSTR) >> (4 * cpu)) & 3) == 3)
>  		__raw_writel(1 << cpu, WUPCR);	/* wake up */
>  	else
> @@ -95,16 +75,14 @@ static int __cpuinit sh73a0_boot_secondary(unsigned int cpu, struct task_struct
>  
>  static void __init sh73a0_smp_prepare_cpus(unsigned int max_cpus)
>  {
> -	int cpu = cpu_logical_map(0);
> -
>  	scu_enable(scu_base_addr());
>  
> -	/* Map the reset vector (in headsmp.S) */
> +	/* Map the reset vector (in headsmp-sh73a0.S) */
>  	__raw_writel(0, APARMBAREA);      /* 4k */
> -	__raw_writel(__pa(shmobile_secondary_vector), SBAR);
> +	__raw_writel(__pa(sh73a0_secondary_vector), SBAR);
>  
> -	/* enable cache coherency on CPU0 */
> -	modify_scu_cpu_psr(0, 3 << (cpu * 8));
> +	/* enable cache coherency on booting CPU */
> +	scu_power_mode(scu_base_addr(), SCU_PM_NORMAL);
>  }
>  
>  static void __init sh73a0_smp_init_cpus(void)
> 

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/2] ARM: SH-Mobile: sh73a0: Add CPU Hotplug
  2012-12-06 12:08   ` Bastian Hecht
@ 2012-12-14 14:17     ` Rob Herring
  -1 siblings, 0 replies; 22+ messages in thread
From: Rob Herring @ 2012-12-14 14:17 UTC (permalink / raw)
  To: linux-arm-kernel

On 12/06/2012 06:08 AM, Bastian Hecht wrote:
> From: Bastian Hecht <hechtb@gmail.com>
> 
> Add the capability to add and remove CPUs on the fly.
> The Cortex-A9 offers the possibility to take single cores out of the
> MP Core. We add this capabilty taking care that caches are kept
> coherent. For the actual shutdown via a WFI instruction, a code snippet
> from the omap2 code tree is copied. Thanks for that! For verifying the
> shutdown we rely on the internal SH73A0 Power Status Register
> PSTR.
> 
> Signed-off-by: Bastian Hecht <hechtb+renesas@gmail.com>
> ---
>  arch/arm/mach-shmobile/headsmp-sh73a0.S      |   46 ++++++++++++++++++++++++++
>  arch/arm/mach-shmobile/include/mach/common.h |    1 +
>  arch/arm/mach-shmobile/smp-sh73a0.c          |   41 +++++++++++++++++++----
>  3 files changed, 82 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/arm/mach-shmobile/headsmp-sh73a0.S b/arch/arm/mach-shmobile/headsmp-sh73a0.S
> index bec4c0d..be463a3 100644
> --- a/arch/arm/mach-shmobile/headsmp-sh73a0.S
> +++ b/arch/arm/mach-shmobile/headsmp-sh73a0.S
> @@ -23,6 +23,52 @@
>  #include <linux/init.h>
>  #include <asm/memory.h>
>  
> +/* Taken from arch/arm/mach-omap2/sleep44xx.S. Thanks! */
> +ENTRY(sh73a0_do_wfi)
> +        stmfd   sp!, {lr}

Why does the lr need to be pushed to the stack?

> +
> +        /*
> +         * Execute an ISB instruction to ensure that all of the
> +         * CP15 register changes have been committed.
> +         */
> +        isb

Generally writes to cp15 registers that need an isb already do so.

> +
> +        /*
> +         * Execute a barrier instruction to ensure that all cache,
> +         * TLB and branch predictor maintenance operations issued
> +         * by any CPU in the cluster have completed.
> +         */
> +        dsb
> +        dmb

A dsb is a superset of a dmb, so you should not need both.

> +
> +        /*
> +         * Execute a WFI instruction and wait until the
> +         * STANDBYWFI output is asserted to indicate that the
> +         * CPU is in idle and low power state. CPU can specualatively
> +         * prefetch the instructions so add NOPs after WFI. Sixteen
> +         * NOPs as per Cortex-A9 pipeline.

Why do you care what is prefetched? You're never coming back here, right?

Rob

> +         */
> +        wfi                                     @ Wait For Interrupt
> +        nop
> +        nop
> +        nop
> +        nop
> +        nop
> +        nop
> +        nop
> +        nop
> +        nop
> +        nop
> +        nop
> +        nop
> +        nop
> +        nop
> +        nop
> +        nop
> +
> +        ldmfd   sp!, {pc}
> +ENDPROC(sh73a0_do_wfi)
> +
>  	__CPUINIT
>  /*
>   * Reset vector for secondary CPUs.
> diff --git a/arch/arm/mach-shmobile/include/mach/common.h b/arch/arm/mach-shmobile/include/mach/common.h
> index f2e2c29..40f767e 100644
> --- a/arch/arm/mach-shmobile/include/mach/common.h
> +++ b/arch/arm/mach-shmobile/include/mach/common.h
> @@ -55,6 +55,7 @@ extern void sh73a0_add_standard_devices(void);
>  extern void sh73a0_clock_init(void);
>  extern void sh73a0_pinmux_init(void);
>  extern void sh73a0_secondary_vector(void);
> +extern void sh73a0_do_wfi(void);
>  extern struct clk sh73a0_extal1_clk;
>  extern struct clk sh73a0_extal2_clk;
>  extern struct clk sh73a0_extcki_clk;
> diff --git a/arch/arm/mach-shmobile/smp-sh73a0.c b/arch/arm/mach-shmobile/smp-sh73a0.c
> index 5e36f5d..9237e13 100644
> --- a/arch/arm/mach-shmobile/smp-sh73a0.c
> +++ b/arch/arm/mach-shmobile/smp-sh73a0.c
> @@ -24,6 +24,7 @@
>  #include <linux/io.h>
>  #include <linux/delay.h>
>  #include <mach/common.h>
> +#include <asm/cacheflush.h>
>  #include <asm/smp_plat.h>
>  #include <mach/sh73a0.h>
>  #include <asm/smp_scu.h>
> @@ -36,6 +37,8 @@
>  #define SBAR		IOMEM(0xe6180020)
>  #define APARMBAREA	IOMEM(0xe6f10020)
>  
> +#define PSTR_SHUTDOWN_MODE	3
> +
>  static void __iomem *scu_base_addr(void)
>  {
>  	return (void __iomem *)0xf0000000;
> @@ -92,16 +95,20 @@ static void __init sh73a0_smp_init_cpus(void)
>  	shmobile_smp_init_cpus(ncores);
>  }
>  
> -static int __maybe_unused sh73a0_cpu_kill(unsigned int cpu)
> +#ifdef CONFIG_HOTPLUG_CPU
> +static int sh73a0_cpu_kill(unsigned int cpu)
>  {
> +
>  	int k;
> +	u32 pstr;
>  
> -	/* this function is running on another CPU than the offline target,
> -	 * here we need wait for shutdown code in platform_cpu_die() to
> -	 * finish before asking SoC-specific code to power off the CPU core.
> +	/*
> +	 * wait until the power status register confirms the shutdown of the
> +	 * offline target
>  	 */
>  	for (k = 0; k < 1000; k++) {
> -		if (shmobile_cpu_is_dead(cpu))
> +		pstr = (__raw_readl(PSTR) >> (4 * cpu)) & 3;
> +		if (pstr = PSTR_SHUTDOWN_MODE)
>  			return 1;
>  
>  		mdelay(1);
> @@ -110,6 +117,28 @@ static int __maybe_unused sh73a0_cpu_kill(unsigned int cpu)
>  	return 0;
>  }
>  
> +static void sh73a0_cpu_die(unsigned int cpu)
> +{
> +	/*
> +	 * The ARM MPcore does not issue a cache coherency request for the L1
> +	 * cache when powering off single CPUs. We must take care of this and
> +	 * further caches.
> +	 */
> +	dsb();
> +	flush_cache_all();
> +
> +	/* Set power off mode. This takes the CPU out of the MP cluster */
> +	scu_power_mode(scu_base_addr(), SCU_PM_POWEROFF);
> +
> +	/* Enter shutdown mode */
> +	sh73a0_do_wfi();
> +
> +	/* We assume success always. We never reach this */
> +	pr_err("Shutting down CPU failed. This should never happen!\n");
> +	for (;;)
> +		;
> +}
> +#endif /* CONFIG_HOTPLUG_CPU */
>  
>  struct smp_operations sh73a0_smp_ops __initdata = {
>  	.smp_init_cpus		= sh73a0_smp_init_cpus,
> @@ -118,7 +147,7 @@ struct smp_operations sh73a0_smp_ops __initdata = {
>  	.smp_boot_secondary	= sh73a0_boot_secondary,
>  #ifdef CONFIG_HOTPLUG_CPU
>  	.cpu_kill		= sh73a0_cpu_kill,
> -	.cpu_die		= shmobile_cpu_die,
> +	.cpu_die		= sh73a0_cpu_die,
>  	.cpu_disable		= shmobile_cpu_disable,
>  #endif
>  };
> 


^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH 2/2] ARM: SH-Mobile: sh73a0: Add CPU Hotplug
@ 2012-12-14 14:17     ` Rob Herring
  0 siblings, 0 replies; 22+ messages in thread
From: Rob Herring @ 2012-12-14 14:17 UTC (permalink / raw)
  To: linux-arm-kernel

On 12/06/2012 06:08 AM, Bastian Hecht wrote:
> From: Bastian Hecht <hechtb@gmail.com>
> 
> Add the capability to add and remove CPUs on the fly.
> The Cortex-A9 offers the possibility to take single cores out of the
> MP Core. We add this capabilty taking care that caches are kept
> coherent. For the actual shutdown via a WFI instruction, a code snippet
> from the omap2 code tree is copied. Thanks for that! For verifying the
> shutdown we rely on the internal SH73A0 Power Status Register
> PSTR.
> 
> Signed-off-by: Bastian Hecht <hechtb+renesas@gmail.com>
> ---
>  arch/arm/mach-shmobile/headsmp-sh73a0.S      |   46 ++++++++++++++++++++++++++
>  arch/arm/mach-shmobile/include/mach/common.h |    1 +
>  arch/arm/mach-shmobile/smp-sh73a0.c          |   41 +++++++++++++++++++----
>  3 files changed, 82 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/arm/mach-shmobile/headsmp-sh73a0.S b/arch/arm/mach-shmobile/headsmp-sh73a0.S
> index bec4c0d..be463a3 100644
> --- a/arch/arm/mach-shmobile/headsmp-sh73a0.S
> +++ b/arch/arm/mach-shmobile/headsmp-sh73a0.S
> @@ -23,6 +23,52 @@
>  #include <linux/init.h>
>  #include <asm/memory.h>
>  
> +/* Taken from arch/arm/mach-omap2/sleep44xx.S. Thanks! */
> +ENTRY(sh73a0_do_wfi)
> +        stmfd   sp!, {lr}

Why does the lr need to be pushed to the stack?

> +
> +        /*
> +         * Execute an ISB instruction to ensure that all of the
> +         * CP15 register changes have been committed.
> +         */
> +        isb

Generally writes to cp15 registers that need an isb already do so.

> +
> +        /*
> +         * Execute a barrier instruction to ensure that all cache,
> +         * TLB and branch predictor maintenance operations issued
> +         * by any CPU in the cluster have completed.
> +         */
> +        dsb
> +        dmb

A dsb is a superset of a dmb, so you should not need both.

> +
> +        /*
> +         * Execute a WFI instruction and wait until the
> +         * STANDBYWFI output is asserted to indicate that the
> +         * CPU is in idle and low power state. CPU can specualatively
> +         * prefetch the instructions so add NOPs after WFI. Sixteen
> +         * NOPs as per Cortex-A9 pipeline.

Why do you care what is prefetched? You're never coming back here, right?

Rob

> +         */
> +        wfi                                     @ Wait For Interrupt
> +        nop
> +        nop
> +        nop
> +        nop
> +        nop
> +        nop
> +        nop
> +        nop
> +        nop
> +        nop
> +        nop
> +        nop
> +        nop
> +        nop
> +        nop
> +        nop
> +
> +        ldmfd   sp!, {pc}
> +ENDPROC(sh73a0_do_wfi)
> +
>  	__CPUINIT
>  /*
>   * Reset vector for secondary CPUs.
> diff --git a/arch/arm/mach-shmobile/include/mach/common.h b/arch/arm/mach-shmobile/include/mach/common.h
> index f2e2c29..40f767e 100644
> --- a/arch/arm/mach-shmobile/include/mach/common.h
> +++ b/arch/arm/mach-shmobile/include/mach/common.h
> @@ -55,6 +55,7 @@ extern void sh73a0_add_standard_devices(void);
>  extern void sh73a0_clock_init(void);
>  extern void sh73a0_pinmux_init(void);
>  extern void sh73a0_secondary_vector(void);
> +extern void sh73a0_do_wfi(void);
>  extern struct clk sh73a0_extal1_clk;
>  extern struct clk sh73a0_extal2_clk;
>  extern struct clk sh73a0_extcki_clk;
> diff --git a/arch/arm/mach-shmobile/smp-sh73a0.c b/arch/arm/mach-shmobile/smp-sh73a0.c
> index 5e36f5d..9237e13 100644
> --- a/arch/arm/mach-shmobile/smp-sh73a0.c
> +++ b/arch/arm/mach-shmobile/smp-sh73a0.c
> @@ -24,6 +24,7 @@
>  #include <linux/io.h>
>  #include <linux/delay.h>
>  #include <mach/common.h>
> +#include <asm/cacheflush.h>
>  #include <asm/smp_plat.h>
>  #include <mach/sh73a0.h>
>  #include <asm/smp_scu.h>
> @@ -36,6 +37,8 @@
>  #define SBAR		IOMEM(0xe6180020)
>  #define APARMBAREA	IOMEM(0xe6f10020)
>  
> +#define PSTR_SHUTDOWN_MODE	3
> +
>  static void __iomem *scu_base_addr(void)
>  {
>  	return (void __iomem *)0xf0000000;
> @@ -92,16 +95,20 @@ static void __init sh73a0_smp_init_cpus(void)
>  	shmobile_smp_init_cpus(ncores);
>  }
>  
> -static int __maybe_unused sh73a0_cpu_kill(unsigned int cpu)
> +#ifdef CONFIG_HOTPLUG_CPU
> +static int sh73a0_cpu_kill(unsigned int cpu)
>  {
> +
>  	int k;
> +	u32 pstr;
>  
> -	/* this function is running on another CPU than the offline target,
> -	 * here we need wait for shutdown code in platform_cpu_die() to
> -	 * finish before asking SoC-specific code to power off the CPU core.
> +	/*
> +	 * wait until the power status register confirms the shutdown of the
> +	 * offline target
>  	 */
>  	for (k = 0; k < 1000; k++) {
> -		if (shmobile_cpu_is_dead(cpu))
> +		pstr = (__raw_readl(PSTR) >> (4 * cpu)) & 3;
> +		if (pstr == PSTR_SHUTDOWN_MODE)
>  			return 1;
>  
>  		mdelay(1);
> @@ -110,6 +117,28 @@ static int __maybe_unused sh73a0_cpu_kill(unsigned int cpu)
>  	return 0;
>  }
>  
> +static void sh73a0_cpu_die(unsigned int cpu)
> +{
> +	/*
> +	 * The ARM MPcore does not issue a cache coherency request for the L1
> +	 * cache when powering off single CPUs. We must take care of this and
> +	 * further caches.
> +	 */
> +	dsb();
> +	flush_cache_all();
> +
> +	/* Set power off mode. This takes the CPU out of the MP cluster */
> +	scu_power_mode(scu_base_addr(), SCU_PM_POWEROFF);
> +
> +	/* Enter shutdown mode */
> +	sh73a0_do_wfi();
> +
> +	/* We assume success always. We never reach this */
> +	pr_err("Shutting down CPU failed. This should never happen!\n");
> +	for (;;)
> +		;
> +}
> +#endif /* CONFIG_HOTPLUG_CPU */
>  
>  struct smp_operations sh73a0_smp_ops __initdata = {
>  	.smp_init_cpus		= sh73a0_smp_init_cpus,
> @@ -118,7 +147,7 @@ struct smp_operations sh73a0_smp_ops __initdata = {
>  	.smp_boot_secondary	= sh73a0_boot_secondary,
>  #ifdef CONFIG_HOTPLUG_CPU
>  	.cpu_kill		= sh73a0_cpu_kill,
> -	.cpu_die		= shmobile_cpu_die,
> +	.cpu_die		= sh73a0_cpu_die,
>  	.cpu_disable		= shmobile_cpu_disable,
>  #endif
>  };
> 

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/2] ARM: SH-Mobile: sh73a0: Secondary CPUs handle own SCU flags
  2012-12-14 14:06   ` Rob Herring
@ 2012-12-14 15:07     ` Bastian Hecht
  -1 siblings, 0 replies; 22+ messages in thread
From: Bastian Hecht @ 2012-12-14 15:07 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Rob,

thanks for commenting on this.

>>
>> -static void modify_scu_cpu_psr(unsigned long set, unsigned long clr)
>> -{
>> -     void __iomem *scu_base = scu_base_addr();
>> -
>> -     spin_lock(&scu_lock);
>> -     tmp = __raw_readl(scu_base + 8);
>> -     tmp &= ~clr;
>> -     tmp |= set;
>> -     spin_unlock(&scu_lock);
>> -
>> -     /* disable cache coherency after releasing the lock */
>> -     __raw_writel(tmp, scu_base + 8);
>
> None of this locking was needed as the power status register is byte
> accessible.

Even if we switch to byte access here we would need protection, as the
SCU power register gets touched from different CPUs.

>> -}
>> -
>>  static unsigned int __init sh73a0_get_core_count(void)
>>  {
>>       void __iomem *scu_base = scu_base_addr();
>> @@ -82,9 +65,6 @@ static int __cpuinit sh73a0_boot_secondary(unsigned int cpu, struct task_struct
>>  {
>>       cpu = cpu_logical_map(cpu);
>>
>> -     /* enable cache coherency */
>> -     modify_scu_cpu_psr(0, 3 << (cpu * 8));
>> -
>
> So simply changing this to scu_power_mode call would accomplish the same
> thing and avoid all the assembly.

Yes that was exactly my fail try before. See
http://www.spinics.net/lists/linux-sh/msg13685.html
It broke the bringing up of secondary CPUs completely as
sh73a0_boot_secondary() is running on CPU0 when booting CPU1. So
scu_power_mode() changed the registers of the wrong CPU.

cheers,

 Bastian

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH 1/2] ARM: SH-Mobile: sh73a0: Secondary CPUs handle own SCU flags
@ 2012-12-14 15:07     ` Bastian Hecht
  0 siblings, 0 replies; 22+ messages in thread
From: Bastian Hecht @ 2012-12-14 15:07 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Rob,

thanks for commenting on this.

>>
>> -static void modify_scu_cpu_psr(unsigned long set, unsigned long clr)
>> -{
>> -     void __iomem *scu_base = scu_base_addr();
>> -
>> -     spin_lock(&scu_lock);
>> -     tmp = __raw_readl(scu_base + 8);
>> -     tmp &= ~clr;
>> -     tmp |= set;
>> -     spin_unlock(&scu_lock);
>> -
>> -     /* disable cache coherency after releasing the lock */
>> -     __raw_writel(tmp, scu_base + 8);
>
> None of this locking was needed as the power status register is byte
> accessible.

Even if we switch to byte access here we would need protection, as the
SCU power register gets touched from different CPUs.

>> -}
>> -
>>  static unsigned int __init sh73a0_get_core_count(void)
>>  {
>>       void __iomem *scu_base = scu_base_addr();
>> @@ -82,9 +65,6 @@ static int __cpuinit sh73a0_boot_secondary(unsigned int cpu, struct task_struct
>>  {
>>       cpu = cpu_logical_map(cpu);
>>
>> -     /* enable cache coherency */
>> -     modify_scu_cpu_psr(0, 3 << (cpu * 8));
>> -
>
> So simply changing this to scu_power_mode call would accomplish the same
> thing and avoid all the assembly.

Yes that was exactly my fail try before. See
http://www.spinics.net/lists/linux-sh/msg13685.html
It broke the bringing up of secondary CPUs completely as
sh73a0_boot_secondary() is running on CPU0 when booting CPU1. So
scu_power_mode() changed the registers of the wrong CPU.

cheers,

 Bastian

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/2] ARM: SH-Mobile: sh73a0: Add CPU Hotplug
  2012-12-14 14:17     ` Rob Herring
@ 2012-12-14 15:33       ` Bastian Hecht
  -1 siblings, 0 replies; 22+ messages in thread
From: Bastian Hecht @ 2012-12-14 15:33 UTC (permalink / raw)
  To: linux-arm-kernel

Hi,

2012/12/14 Rob Herring <robherring2@gmail.com>:
> On 12/06/2012 06:08 AM, Bastian Hecht wrote:
>> From: Bastian Hecht <hechtb@gmail.com>
>>
>> Add the capability to add and remove CPUs on the fly.
>> The Cortex-A9 offers the possibility to take single cores out of the
>> MP Core. We add this capabilty taking care that caches are kept
>> coherent. For the actual shutdown via a WFI instruction, a code snippet
>> from the omap2 code tree is copied. Thanks for that! For verifying the
>> shutdown we rely on the internal SH73A0 Power Status Register
>> PSTR.
>>
>> Signed-off-by: Bastian Hecht <hechtb+renesas@gmail.com>
>> ---
>>  arch/arm/mach-shmobile/headsmp-sh73a0.S      |   46 ++++++++++++++++++++++++++
>>  arch/arm/mach-shmobile/include/mach/common.h |    1 +
>>  arch/arm/mach-shmobile/smp-sh73a0.c          |   41 +++++++++++++++++++----
>>  3 files changed, 82 insertions(+), 6 deletions(-)
>>
>> diff --git a/arch/arm/mach-shmobile/headsmp-sh73a0.S b/arch/arm/mach-shmobile/headsmp-sh73a0.S
>> index bec4c0d..be463a3 100644
>> --- a/arch/arm/mach-shmobile/headsmp-sh73a0.S
>> +++ b/arch/arm/mach-shmobile/headsmp-sh73a0.S
>> @@ -23,6 +23,52 @@
>>  #include <linux/init.h>
>>  #include <asm/memory.h>
>>
>> +/* Taken from arch/arm/mach-omap2/sleep44xx.S. Thanks! */
>> +ENTRY(sh73a0_do_wfi)
>> +        stmfd   sp!, {lr}
>
> Why does the lr need to be pushed to the stack?

Yes I must admit this is paradox - we never return but prepare to do
so... In the OMAP code they've got a lead out code in case the WFI
doesn't succeed. I see no reason how this could ever happen here but
to take a safe route I've decided to keep the mechanism to be able to
return and spit out an error message back in the C code.

>> +
>> +        /*
>> +         * Execute an ISB instruction to ensure that all of the
>> +         * CP15 register changes have been committed.
>> +         */
>> +        isb
>
> Generally writes to cp15 registers that need an isb already do so.

Ok nice, I'll check that and throw it out.

>> +
>> +        /*
>> +         * Execute a barrier instruction to ensure that all cache,
>> +         * TLB and branch predictor maintenance operations issued
>> +         * by any CPU in the cluster have completed.
>> +         */
>> +        dsb
>> +        dmb
>
> A dsb is a superset of a dmb, so you should not need both.

Same here.

>> +
>> +        /*
>> +         * Execute a WFI instruction and wait until the
>> +         * STANDBYWFI output is asserted to indicate that the
>> +         * CPU is in idle and low power state. CPU can specualatively
>> +         * prefetch the instructions so add NOPs after WFI. Sixteen
>> +         * NOPs as per Cortex-A9 pipeline.
>
> Why do you care what is prefetched? You're never coming back here, right?

We can jump back to the paradox top. The idea seems to be to have a
clean pipeline in case the WFI doesn't succeed.
The thing about this whole code snippet is: I saw no reason to
reinvent the wheel and tinker on my own solution when there is code
that does the job in a clean way. I thought this could maybe be moved
to a general ARM code base when more people rely on it.

cheers,

 Bastian


> Rob
>
>> +         */
>> +        wfi                                     @ Wait For Interrupt
>> +        nop
>> +        nop
>> +        nop
>> +        nop
>> +        nop
>> +        nop
>> +        nop
>> +        nop
>> +        nop
>> +        nop
>> +        nop
>> +        nop
>> +        nop
>> +        nop
>> +        nop
>> +        nop
>> +
>> +        ldmfd   sp!, {pc}
>> +ENDPROC(sh73a0_do_wfi)
>> +
>>       __CPUINIT
>>  /*
>>   * Reset vector for secondary CPUs.
>> diff --git a/arch/arm/mach-shmobile/include/mach/common.h b/arch/arm/mach-shmobile/include/mach/common.h
>> index f2e2c29..40f767e 100644
>> --- a/arch/arm/mach-shmobile/include/mach/common.h
>> +++ b/arch/arm/mach-shmobile/include/mach/common.h
>> @@ -55,6 +55,7 @@ extern void sh73a0_add_standard_devices(void);
>>  extern void sh73a0_clock_init(void);
>>  extern void sh73a0_pinmux_init(void);
>>  extern void sh73a0_secondary_vector(void);
>> +extern void sh73a0_do_wfi(void);
>>  extern struct clk sh73a0_extal1_clk;
>>  extern struct clk sh73a0_extal2_clk;
>>  extern struct clk sh73a0_extcki_clk;
>> diff --git a/arch/arm/mach-shmobile/smp-sh73a0.c b/arch/arm/mach-shmobile/smp-sh73a0.c
>> index 5e36f5d..9237e13 100644
>> --- a/arch/arm/mach-shmobile/smp-sh73a0.c
>> +++ b/arch/arm/mach-shmobile/smp-sh73a0.c
>> @@ -24,6 +24,7 @@
>>  #include <linux/io.h>
>>  #include <linux/delay.h>
>>  #include <mach/common.h>
>> +#include <asm/cacheflush.h>
>>  #include <asm/smp_plat.h>
>>  #include <mach/sh73a0.h>
>>  #include <asm/smp_scu.h>
>> @@ -36,6 +37,8 @@
>>  #define SBAR         IOMEM(0xe6180020)
>>  #define APARMBAREA   IOMEM(0xe6f10020)
>>
>> +#define PSTR_SHUTDOWN_MODE   3
>> +
>>  static void __iomem *scu_base_addr(void)
>>  {
>>       return (void __iomem *)0xf0000000;
>> @@ -92,16 +95,20 @@ static void __init sh73a0_smp_init_cpus(void)
>>       shmobile_smp_init_cpus(ncores);
>>  }
>>
>> -static int __maybe_unused sh73a0_cpu_kill(unsigned int cpu)
>> +#ifdef CONFIG_HOTPLUG_CPU
>> +static int sh73a0_cpu_kill(unsigned int cpu)
>>  {
>> +
>>       int k;
>> +     u32 pstr;
>>
>> -     /* this function is running on another CPU than the offline target,
>> -      * here we need wait for shutdown code in platform_cpu_die() to
>> -      * finish before asking SoC-specific code to power off the CPU core.
>> +     /*
>> +      * wait until the power status register confirms the shutdown of the
>> +      * offline target
>>        */
>>       for (k = 0; k < 1000; k++) {
>> -             if (shmobile_cpu_is_dead(cpu))
>> +             pstr = (__raw_readl(PSTR) >> (4 * cpu)) & 3;
>> +             if (pstr = PSTR_SHUTDOWN_MODE)
>>                       return 1;
>>
>>               mdelay(1);
>> @@ -110,6 +117,28 @@ static int __maybe_unused sh73a0_cpu_kill(unsigned int cpu)
>>       return 0;
>>  }
>>
>> +static void sh73a0_cpu_die(unsigned int cpu)
>> +{
>> +     /*
>> +      * The ARM MPcore does not issue a cache coherency request for the L1
>> +      * cache when powering off single CPUs. We must take care of this and
>> +      * further caches.
>> +      */
>> +     dsb();
>> +     flush_cache_all();
>> +
>> +     /* Set power off mode. This takes the CPU out of the MP cluster */
>> +     scu_power_mode(scu_base_addr(), SCU_PM_POWEROFF);
>> +
>> +     /* Enter shutdown mode */
>> +     sh73a0_do_wfi();
>> +
>> +     /* We assume success always. We never reach this */
>> +     pr_err("Shutting down CPU failed. This should never happen!\n");
>> +     for (;;)
>> +             ;
>> +}
>> +#endif /* CONFIG_HOTPLUG_CPU */
>>
>>  struct smp_operations sh73a0_smp_ops __initdata = {
>>       .smp_init_cpus          = sh73a0_smp_init_cpus,
>> @@ -118,7 +147,7 @@ struct smp_operations sh73a0_smp_ops __initdata = {
>>       .smp_boot_secondary     = sh73a0_boot_secondary,
>>  #ifdef CONFIG_HOTPLUG_CPU
>>       .cpu_kill               = sh73a0_cpu_kill,
>> -     .cpu_die                = shmobile_cpu_die,
>> +     .cpu_die                = sh73a0_cpu_die,
>>       .cpu_disable            = shmobile_cpu_disable,
>>  #endif
>>  };
>>
>

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH 2/2] ARM: SH-Mobile: sh73a0: Add CPU Hotplug
@ 2012-12-14 15:33       ` Bastian Hecht
  0 siblings, 0 replies; 22+ messages in thread
From: Bastian Hecht @ 2012-12-14 15:33 UTC (permalink / raw)
  To: linux-arm-kernel

Hi,

2012/12/14 Rob Herring <robherring2@gmail.com>:
> On 12/06/2012 06:08 AM, Bastian Hecht wrote:
>> From: Bastian Hecht <hechtb@gmail.com>
>>
>> Add the capability to add and remove CPUs on the fly.
>> The Cortex-A9 offers the possibility to take single cores out of the
>> MP Core. We add this capabilty taking care that caches are kept
>> coherent. For the actual shutdown via a WFI instruction, a code snippet
>> from the omap2 code tree is copied. Thanks for that! For verifying the
>> shutdown we rely on the internal SH73A0 Power Status Register
>> PSTR.
>>
>> Signed-off-by: Bastian Hecht <hechtb+renesas@gmail.com>
>> ---
>>  arch/arm/mach-shmobile/headsmp-sh73a0.S      |   46 ++++++++++++++++++++++++++
>>  arch/arm/mach-shmobile/include/mach/common.h |    1 +
>>  arch/arm/mach-shmobile/smp-sh73a0.c          |   41 +++++++++++++++++++----
>>  3 files changed, 82 insertions(+), 6 deletions(-)
>>
>> diff --git a/arch/arm/mach-shmobile/headsmp-sh73a0.S b/arch/arm/mach-shmobile/headsmp-sh73a0.S
>> index bec4c0d..be463a3 100644
>> --- a/arch/arm/mach-shmobile/headsmp-sh73a0.S
>> +++ b/arch/arm/mach-shmobile/headsmp-sh73a0.S
>> @@ -23,6 +23,52 @@
>>  #include <linux/init.h>
>>  #include <asm/memory.h>
>>
>> +/* Taken from arch/arm/mach-omap2/sleep44xx.S. Thanks! */
>> +ENTRY(sh73a0_do_wfi)
>> +        stmfd   sp!, {lr}
>
> Why does the lr need to be pushed to the stack?

Yes I must admit this is paradox - we never return but prepare to do
so... In the OMAP code they've got a lead out code in case the WFI
doesn't succeed. I see no reason how this could ever happen here but
to take a safe route I've decided to keep the mechanism to be able to
return and spit out an error message back in the C code.

>> +
>> +        /*
>> +         * Execute an ISB instruction to ensure that all of the
>> +         * CP15 register changes have been committed.
>> +         */
>> +        isb
>
> Generally writes to cp15 registers that need an isb already do so.

Ok nice, I'll check that and throw it out.

>> +
>> +        /*
>> +         * Execute a barrier instruction to ensure that all cache,
>> +         * TLB and branch predictor maintenance operations issued
>> +         * by any CPU in the cluster have completed.
>> +         */
>> +        dsb
>> +        dmb
>
> A dsb is a superset of a dmb, so you should not need both.

Same here.

>> +
>> +        /*
>> +         * Execute a WFI instruction and wait until the
>> +         * STANDBYWFI output is asserted to indicate that the
>> +         * CPU is in idle and low power state. CPU can specualatively
>> +         * prefetch the instructions so add NOPs after WFI. Sixteen
>> +         * NOPs as per Cortex-A9 pipeline.
>
> Why do you care what is prefetched? You're never coming back here, right?

We can jump back to the paradox top. The idea seems to be to have a
clean pipeline in case the WFI doesn't succeed.
The thing about this whole code snippet is: I saw no reason to
reinvent the wheel and tinker on my own solution when there is code
that does the job in a clean way. I thought this could maybe be moved
to a general ARM code base when more people rely on it.

cheers,

 Bastian


> Rob
>
>> +         */
>> +        wfi                                     @ Wait For Interrupt
>> +        nop
>> +        nop
>> +        nop
>> +        nop
>> +        nop
>> +        nop
>> +        nop
>> +        nop
>> +        nop
>> +        nop
>> +        nop
>> +        nop
>> +        nop
>> +        nop
>> +        nop
>> +        nop
>> +
>> +        ldmfd   sp!, {pc}
>> +ENDPROC(sh73a0_do_wfi)
>> +
>>       __CPUINIT
>>  /*
>>   * Reset vector for secondary CPUs.
>> diff --git a/arch/arm/mach-shmobile/include/mach/common.h b/arch/arm/mach-shmobile/include/mach/common.h
>> index f2e2c29..40f767e 100644
>> --- a/arch/arm/mach-shmobile/include/mach/common.h
>> +++ b/arch/arm/mach-shmobile/include/mach/common.h
>> @@ -55,6 +55,7 @@ extern void sh73a0_add_standard_devices(void);
>>  extern void sh73a0_clock_init(void);
>>  extern void sh73a0_pinmux_init(void);
>>  extern void sh73a0_secondary_vector(void);
>> +extern void sh73a0_do_wfi(void);
>>  extern struct clk sh73a0_extal1_clk;
>>  extern struct clk sh73a0_extal2_clk;
>>  extern struct clk sh73a0_extcki_clk;
>> diff --git a/arch/arm/mach-shmobile/smp-sh73a0.c b/arch/arm/mach-shmobile/smp-sh73a0.c
>> index 5e36f5d..9237e13 100644
>> --- a/arch/arm/mach-shmobile/smp-sh73a0.c
>> +++ b/arch/arm/mach-shmobile/smp-sh73a0.c
>> @@ -24,6 +24,7 @@
>>  #include <linux/io.h>
>>  #include <linux/delay.h>
>>  #include <mach/common.h>
>> +#include <asm/cacheflush.h>
>>  #include <asm/smp_plat.h>
>>  #include <mach/sh73a0.h>
>>  #include <asm/smp_scu.h>
>> @@ -36,6 +37,8 @@
>>  #define SBAR         IOMEM(0xe6180020)
>>  #define APARMBAREA   IOMEM(0xe6f10020)
>>
>> +#define PSTR_SHUTDOWN_MODE   3
>> +
>>  static void __iomem *scu_base_addr(void)
>>  {
>>       return (void __iomem *)0xf0000000;
>> @@ -92,16 +95,20 @@ static void __init sh73a0_smp_init_cpus(void)
>>       shmobile_smp_init_cpus(ncores);
>>  }
>>
>> -static int __maybe_unused sh73a0_cpu_kill(unsigned int cpu)
>> +#ifdef CONFIG_HOTPLUG_CPU
>> +static int sh73a0_cpu_kill(unsigned int cpu)
>>  {
>> +
>>       int k;
>> +     u32 pstr;
>>
>> -     /* this function is running on another CPU than the offline target,
>> -      * here we need wait for shutdown code in platform_cpu_die() to
>> -      * finish before asking SoC-specific code to power off the CPU core.
>> +     /*
>> +      * wait until the power status register confirms the shutdown of the
>> +      * offline target
>>        */
>>       for (k = 0; k < 1000; k++) {
>> -             if (shmobile_cpu_is_dead(cpu))
>> +             pstr = (__raw_readl(PSTR) >> (4 * cpu)) & 3;
>> +             if (pstr == PSTR_SHUTDOWN_MODE)
>>                       return 1;
>>
>>               mdelay(1);
>> @@ -110,6 +117,28 @@ static int __maybe_unused sh73a0_cpu_kill(unsigned int cpu)
>>       return 0;
>>  }
>>
>> +static void sh73a0_cpu_die(unsigned int cpu)
>> +{
>> +     /*
>> +      * The ARM MPcore does not issue a cache coherency request for the L1
>> +      * cache when powering off single CPUs. We must take care of this and
>> +      * further caches.
>> +      */
>> +     dsb();
>> +     flush_cache_all();
>> +
>> +     /* Set power off mode. This takes the CPU out of the MP cluster */
>> +     scu_power_mode(scu_base_addr(), SCU_PM_POWEROFF);
>> +
>> +     /* Enter shutdown mode */
>> +     sh73a0_do_wfi();
>> +
>> +     /* We assume success always. We never reach this */
>> +     pr_err("Shutting down CPU failed. This should never happen!\n");
>> +     for (;;)
>> +             ;
>> +}
>> +#endif /* CONFIG_HOTPLUG_CPU */
>>
>>  struct smp_operations sh73a0_smp_ops __initdata = {
>>       .smp_init_cpus          = sh73a0_smp_init_cpus,
>> @@ -118,7 +147,7 @@ struct smp_operations sh73a0_smp_ops __initdata = {
>>       .smp_boot_secondary     = sh73a0_boot_secondary,
>>  #ifdef CONFIG_HOTPLUG_CPU
>>       .cpu_kill               = sh73a0_cpu_kill,
>> -     .cpu_die                = shmobile_cpu_die,
>> +     .cpu_die                = sh73a0_cpu_die,
>>       .cpu_disable            = shmobile_cpu_disable,
>>  #endif
>>  };
>>
>

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/2] ARM: SH-Mobile: sh73a0: Add CPU Hotplug
  2012-12-14 15:33       ` Bastian Hecht
@ 2012-12-14 18:08         ` Rob Herring
  -1 siblings, 0 replies; 22+ messages in thread
From: Rob Herring @ 2012-12-14 18:08 UTC (permalink / raw)
  To: linux-arm-kernel

On 12/14/2012 09:33 AM, Bastian Hecht wrote:
> Hi,
> 
> 2012/12/14 Rob Herring <robherring2@gmail.com>:
>> On 12/06/2012 06:08 AM, Bastian Hecht wrote:
>>> From: Bastian Hecht <hechtb@gmail.com>
>>>
>>> Add the capability to add and remove CPUs on the fly.
>>> The Cortex-A9 offers the possibility to take single cores out of the
>>> MP Core. We add this capabilty taking care that caches are kept
>>> coherent. For the actual shutdown via a WFI instruction, a code snippet
>>> from the omap2 code tree is copied. Thanks for that! For verifying the
>>> shutdown we rely on the internal SH73A0 Power Status Register
>>> PSTR.
>>>
>>> Signed-off-by: Bastian Hecht <hechtb+renesas@gmail.com>
>>> ---
>>>  arch/arm/mach-shmobile/headsmp-sh73a0.S      |   46 ++++++++++++++++++++++++++
>>>  arch/arm/mach-shmobile/include/mach/common.h |    1 +
>>>  arch/arm/mach-shmobile/smp-sh73a0.c          |   41 +++++++++++++++++++----
>>>  3 files changed, 82 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/arch/arm/mach-shmobile/headsmp-sh73a0.S b/arch/arm/mach-shmobile/headsmp-sh73a0.S
>>> index bec4c0d..be463a3 100644
>>> --- a/arch/arm/mach-shmobile/headsmp-sh73a0.S
>>> +++ b/arch/arm/mach-shmobile/headsmp-sh73a0.S
>>> @@ -23,6 +23,52 @@
>>>  #include <linux/init.h>
>>>  #include <asm/memory.h>
>>>
>>> +/* Taken from arch/arm/mach-omap2/sleep44xx.S. Thanks! */
>>> +ENTRY(sh73a0_do_wfi)
>>> +        stmfd   sp!, {lr}
>>
>> Why does the lr need to be pushed to the stack?
> 
> Yes I must admit this is paradox - we never return but prepare to do
> so... In the OMAP code they've got a lead out code in case the WFI
> doesn't succeed. I see no reason how this could ever happen here but
> to take a safe route I've decided to keep the mechanism to be able to
> return and spit out an error message back in the C code.

It's not clear to me that OMAP needed this either. The lr value would
have to get lost during wfi.

>>> +
>>> +        /*
>>> +         * Execute an ISB instruction to ensure that all of the
>>> +         * CP15 register changes have been committed.
>>> +         */
>>> +        isb
>>
>> Generally writes to cp15 registers that need an isb already do so.
> 
> Ok nice, I'll check that and throw it out.
> 
>>> +
>>> +        /*
>>> +         * Execute a barrier instruction to ensure that all cache,
>>> +         * TLB and branch predictor maintenance operations issued
>>> +         * by any CPU in the cluster have completed.
>>> +         */
>>> +        dsb
>>> +        dmb
>>
>> A dsb is a superset of a dmb, so you should not need both.
> 
> Same here.
> 
>>> +
>>> +        /*
>>> +         * Execute a WFI instruction and wait until the
>>> +         * STANDBYWFI output is asserted to indicate that the
>>> +         * CPU is in idle and low power state. CPU can specualatively
>>> +         * prefetch the instructions so add NOPs after WFI. Sixteen
>>> +         * NOPs as per Cortex-A9 pipeline.
>>
>> Why do you care what is prefetched? You're never coming back here, right?
> 
> We can jump back to the paradox top. The idea seems to be to have a
> clean pipeline in case the WFI doesn't succeed.
> The thing about this whole code snippet is: I saw no reason to
> reinvent the wheel and tinker on my own solution when there is code
> that does the job in a clean way. I thought this could maybe be moved
> to a general ARM code base when more people rely on it.
> 

Blindly copying code is reinventing the wheel. You are making it appear
that this is needed, but don't seem to have any reason why other than
OMAP did it. If it is in fact needed, then it should be common.

Use cpu_do_idle or figure out and explain why you can't. It's important
to know that if we do go and consolidate this code later.

Rob


^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH 2/2] ARM: SH-Mobile: sh73a0: Add CPU Hotplug
@ 2012-12-14 18:08         ` Rob Herring
  0 siblings, 0 replies; 22+ messages in thread
From: Rob Herring @ 2012-12-14 18:08 UTC (permalink / raw)
  To: linux-arm-kernel

On 12/14/2012 09:33 AM, Bastian Hecht wrote:
> Hi,
> 
> 2012/12/14 Rob Herring <robherring2@gmail.com>:
>> On 12/06/2012 06:08 AM, Bastian Hecht wrote:
>>> From: Bastian Hecht <hechtb@gmail.com>
>>>
>>> Add the capability to add and remove CPUs on the fly.
>>> The Cortex-A9 offers the possibility to take single cores out of the
>>> MP Core. We add this capabilty taking care that caches are kept
>>> coherent. For the actual shutdown via a WFI instruction, a code snippet
>>> from the omap2 code tree is copied. Thanks for that! For verifying the
>>> shutdown we rely on the internal SH73A0 Power Status Register
>>> PSTR.
>>>
>>> Signed-off-by: Bastian Hecht <hechtb+renesas@gmail.com>
>>> ---
>>>  arch/arm/mach-shmobile/headsmp-sh73a0.S      |   46 ++++++++++++++++++++++++++
>>>  arch/arm/mach-shmobile/include/mach/common.h |    1 +
>>>  arch/arm/mach-shmobile/smp-sh73a0.c          |   41 +++++++++++++++++++----
>>>  3 files changed, 82 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/arch/arm/mach-shmobile/headsmp-sh73a0.S b/arch/arm/mach-shmobile/headsmp-sh73a0.S
>>> index bec4c0d..be463a3 100644
>>> --- a/arch/arm/mach-shmobile/headsmp-sh73a0.S
>>> +++ b/arch/arm/mach-shmobile/headsmp-sh73a0.S
>>> @@ -23,6 +23,52 @@
>>>  #include <linux/init.h>
>>>  #include <asm/memory.h>
>>>
>>> +/* Taken from arch/arm/mach-omap2/sleep44xx.S. Thanks! */
>>> +ENTRY(sh73a0_do_wfi)
>>> +        stmfd   sp!, {lr}
>>
>> Why does the lr need to be pushed to the stack?
> 
> Yes I must admit this is paradox - we never return but prepare to do
> so... In the OMAP code they've got a lead out code in case the WFI
> doesn't succeed. I see no reason how this could ever happen here but
> to take a safe route I've decided to keep the mechanism to be able to
> return and spit out an error message back in the C code.

It's not clear to me that OMAP needed this either. The lr value would
have to get lost during wfi.

>>> +
>>> +        /*
>>> +         * Execute an ISB instruction to ensure that all of the
>>> +         * CP15 register changes have been committed.
>>> +         */
>>> +        isb
>>
>> Generally writes to cp15 registers that need an isb already do so.
> 
> Ok nice, I'll check that and throw it out.
> 
>>> +
>>> +        /*
>>> +         * Execute a barrier instruction to ensure that all cache,
>>> +         * TLB and branch predictor maintenance operations issued
>>> +         * by any CPU in the cluster have completed.
>>> +         */
>>> +        dsb
>>> +        dmb
>>
>> A dsb is a superset of a dmb, so you should not need both.
> 
> Same here.
> 
>>> +
>>> +        /*
>>> +         * Execute a WFI instruction and wait until the
>>> +         * STANDBYWFI output is asserted to indicate that the
>>> +         * CPU is in idle and low power state. CPU can specualatively
>>> +         * prefetch the instructions so add NOPs after WFI. Sixteen
>>> +         * NOPs as per Cortex-A9 pipeline.
>>
>> Why do you care what is prefetched? You're never coming back here, right?
> 
> We can jump back to the paradox top. The idea seems to be to have a
> clean pipeline in case the WFI doesn't succeed.
> The thing about this whole code snippet is: I saw no reason to
> reinvent the wheel and tinker on my own solution when there is code
> that does the job in a clean way. I thought this could maybe be moved
> to a general ARM code base when more people rely on it.
> 

Blindly copying code is reinventing the wheel. You are making it appear
that this is needed, but don't seem to have any reason why other than
OMAP did it. If it is in fact needed, then it should be common.

Use cpu_do_idle or figure out and explain why you can't. It's important
to know that if we do go and consolidate this code later.

Rob

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/2] ARM: SH-Mobile: sh73a0: Add CPU Hotplug
  2012-12-14 18:08         ` Rob Herring
@ 2012-12-17 11:01           ` Bastian Hecht
  -1 siblings, 0 replies; 22+ messages in thread
From: Bastian Hecht @ 2012-12-17 11:01 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Rob,

2012/12/14 Rob Herring <robherring2@gmail.com>:
> On 12/14/2012 09:33 AM, Bastian Hecht wrote:
>> Hi,
>>
>> 2012/12/14 Rob Herring <robherring2@gmail.com>:
>>> On 12/06/2012 06:08 AM, Bastian Hecht wrote:
>>>> From: Bastian Hecht <hechtb@gmail.com>
>>>>
>>>> Add the capability to add and remove CPUs on the fly.
>>>> The Cortex-A9 offers the possibility to take single cores out of the
>>>> MP Core. We add this capabilty taking care that caches are kept
>>>> coherent. For the actual shutdown via a WFI instruction, a code snippet
>>>> from the omap2 code tree is copied. Thanks for that! For verifying the
>>>> shutdown we rely on the internal SH73A0 Power Status Register
>>>> PSTR.
>>>>
>>>> Signed-off-by: Bastian Hecht <hechtb+renesas@gmail.com>
>>>> ---
>>>>  arch/arm/mach-shmobile/headsmp-sh73a0.S      |   46 ++++++++++++++++++++++++++
>>>>  arch/arm/mach-shmobile/include/mach/common.h |    1 +
>>>>  arch/arm/mach-shmobile/smp-sh73a0.c          |   41 +++++++++++++++++++----
>>>>  3 files changed, 82 insertions(+), 6 deletions(-)
>>>>
>>>> diff --git a/arch/arm/mach-shmobile/headsmp-sh73a0.S b/arch/arm/mach-shmobile/headsmp-sh73a0.S
>>>> index bec4c0d..be463a3 100644
>>>> --- a/arch/arm/mach-shmobile/headsmp-sh73a0.S
>>>> +++ b/arch/arm/mach-shmobile/headsmp-sh73a0.S
>>>> @@ -23,6 +23,52 @@
>>>>  #include <linux/init.h>
>>>>  #include <asm/memory.h>
>>>>
>>>> +/* Taken from arch/arm/mach-omap2/sleep44xx.S. Thanks! */
>>>> +ENTRY(sh73a0_do_wfi)
>>>> +        stmfd   sp!, {lr}
>>>
>>> Why does the lr need to be pushed to the stack?
>>
>> Yes I must admit this is paradox - we never return but prepare to do
>> so... In the OMAP code they've got a lead out code in case the WFI
>> doesn't succeed. I see no reason how this could ever happen here but
>> to take a safe route I've decided to keep the mechanism to be able to
>> return and spit out an error message back in the C code.
>
> It's not clear to me that OMAP needed this either. The lr value would
> have to get lost during wfi.
>
>>>> +
>>>> +        /*
>>>> +         * Execute an ISB instruction to ensure that all of the
>>>> +         * CP15 register changes have been committed.
>>>> +         */
>>>> +        isb
>>>
>>> Generally writes to cp15 registers that need an isb already do so.
>>
>> Ok nice, I'll check that and throw it out.
>>
>>>> +
>>>> +        /*
>>>> +         * Execute a barrier instruction to ensure that all cache,
>>>> +         * TLB and branch predictor maintenance operations issued
>>>> +         * by any CPU in the cluster have completed.
>>>> +         */
>>>> +        dsb
>>>> +        dmb
>>>
>>> A dsb is a superset of a dmb, so you should not need both.
>>
>> Same here.
>>
>>>> +
>>>> +        /*
>>>> +         * Execute a WFI instruction and wait until the
>>>> +         * STANDBYWFI output is asserted to indicate that the
>>>> +         * CPU is in idle and low power state. CPU can specualatively
>>>> +         * prefetch the instructions so add NOPs after WFI. Sixteen
>>>> +         * NOPs as per Cortex-A9 pipeline.
>>>
>>> Why do you care what is prefetched? You're never coming back here, right?
>>
>> We can jump back to the paradox top. The idea seems to be to have a
>> clean pipeline in case the WFI doesn't succeed.
>> The thing about this whole code snippet is: I saw no reason to
>> reinvent the wheel and tinker on my own solution when there is code
>> that does the job in a clean way. I thought this could maybe be moved
>> to a general ARM code base when more people rely on it.
>>
>
> Blindly copying code is reinventing the wheel. You are making it appear
> that this is needed, but don't seem to have any reason why other than
> OMAP did it. If it is in fact needed, then it should be common.
>
> Use cpu_do_idle or figure out and explain why you can't. It's important
> to know that if we do go and consolidate this code later.

Yes after thinking about it a bit, I must completely agree to you.
Copying OMAP's code was done because of hesitation and uncertainty
that I could miss something and brings the drawbacks that you
mentioned. It's much straighter to go with cpu_do_idle.

Thanks!

 Bastian


> Rob
>

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH 2/2] ARM: SH-Mobile: sh73a0: Add CPU Hotplug
@ 2012-12-17 11:01           ` Bastian Hecht
  0 siblings, 0 replies; 22+ messages in thread
From: Bastian Hecht @ 2012-12-17 11:01 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Rob,

2012/12/14 Rob Herring <robherring2@gmail.com>:
> On 12/14/2012 09:33 AM, Bastian Hecht wrote:
>> Hi,
>>
>> 2012/12/14 Rob Herring <robherring2@gmail.com>:
>>> On 12/06/2012 06:08 AM, Bastian Hecht wrote:
>>>> From: Bastian Hecht <hechtb@gmail.com>
>>>>
>>>> Add the capability to add and remove CPUs on the fly.
>>>> The Cortex-A9 offers the possibility to take single cores out of the
>>>> MP Core. We add this capabilty taking care that caches are kept
>>>> coherent. For the actual shutdown via a WFI instruction, a code snippet
>>>> from the omap2 code tree is copied. Thanks for that! For verifying the
>>>> shutdown we rely on the internal SH73A0 Power Status Register
>>>> PSTR.
>>>>
>>>> Signed-off-by: Bastian Hecht <hechtb+renesas@gmail.com>
>>>> ---
>>>>  arch/arm/mach-shmobile/headsmp-sh73a0.S      |   46 ++++++++++++++++++++++++++
>>>>  arch/arm/mach-shmobile/include/mach/common.h |    1 +
>>>>  arch/arm/mach-shmobile/smp-sh73a0.c          |   41 +++++++++++++++++++----
>>>>  3 files changed, 82 insertions(+), 6 deletions(-)
>>>>
>>>> diff --git a/arch/arm/mach-shmobile/headsmp-sh73a0.S b/arch/arm/mach-shmobile/headsmp-sh73a0.S
>>>> index bec4c0d..be463a3 100644
>>>> --- a/arch/arm/mach-shmobile/headsmp-sh73a0.S
>>>> +++ b/arch/arm/mach-shmobile/headsmp-sh73a0.S
>>>> @@ -23,6 +23,52 @@
>>>>  #include <linux/init.h>
>>>>  #include <asm/memory.h>
>>>>
>>>> +/* Taken from arch/arm/mach-omap2/sleep44xx.S. Thanks! */
>>>> +ENTRY(sh73a0_do_wfi)
>>>> +        stmfd   sp!, {lr}
>>>
>>> Why does the lr need to be pushed to the stack?
>>
>> Yes I must admit this is paradox - we never return but prepare to do
>> so... In the OMAP code they've got a lead out code in case the WFI
>> doesn't succeed. I see no reason how this could ever happen here but
>> to take a safe route I've decided to keep the mechanism to be able to
>> return and spit out an error message back in the C code.
>
> It's not clear to me that OMAP needed this either. The lr value would
> have to get lost during wfi.
>
>>>> +
>>>> +        /*
>>>> +         * Execute an ISB instruction to ensure that all of the
>>>> +         * CP15 register changes have been committed.
>>>> +         */
>>>> +        isb
>>>
>>> Generally writes to cp15 registers that need an isb already do so.
>>
>> Ok nice, I'll check that and throw it out.
>>
>>>> +
>>>> +        /*
>>>> +         * Execute a barrier instruction to ensure that all cache,
>>>> +         * TLB and branch predictor maintenance operations issued
>>>> +         * by any CPU in the cluster have completed.
>>>> +         */
>>>> +        dsb
>>>> +        dmb
>>>
>>> A dsb is a superset of a dmb, so you should not need both.
>>
>> Same here.
>>
>>>> +
>>>> +        /*
>>>> +         * Execute a WFI instruction and wait until the
>>>> +         * STANDBYWFI output is asserted to indicate that the
>>>> +         * CPU is in idle and low power state. CPU can specualatively
>>>> +         * prefetch the instructions so add NOPs after WFI. Sixteen
>>>> +         * NOPs as per Cortex-A9 pipeline.
>>>
>>> Why do you care what is prefetched? You're never coming back here, right?
>>
>> We can jump back to the paradox top. The idea seems to be to have a
>> clean pipeline in case the WFI doesn't succeed.
>> The thing about this whole code snippet is: I saw no reason to
>> reinvent the wheel and tinker on my own solution when there is code
>> that does the job in a clean way. I thought this could maybe be moved
>> to a general ARM code base when more people rely on it.
>>
>
> Blindly copying code is reinventing the wheel. You are making it appear
> that this is needed, but don't seem to have any reason why other than
> OMAP did it. If it is in fact needed, then it should be common.
>
> Use cpu_do_idle or figure out and explain why you can't. It's important
> to know that if we do go and consolidate this code later.

Yes after thinking about it a bit, I must completely agree to you.
Copying OMAP's code was done because of hesitation and uncertainty
that I could miss something and brings the drawbacks that you
mentioned. It's much straighter to go with cpu_do_idle.

Thanks!

 Bastian


> Rob
>

^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2012-12-17 11:01 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-12-06 12:08 [PATCH 1/2] ARM: SH-Mobile: sh73a0: Secondary CPUs handle own SCU flags Bastian Hecht
2012-12-06 12:08 ` Bastian Hecht
2012-12-06 12:08 ` [PATCH 2/2] ARM: SH-Mobile: sh73a0: Add CPU Hotplug Bastian Hecht
2012-12-06 12:08   ` Bastian Hecht
2012-12-14  3:21   ` Magnus Damm
2012-12-14  3:21     ` Magnus Damm
2012-12-14 14:17   ` Rob Herring
2012-12-14 14:17     ` Rob Herring
2012-12-14 15:33     ` Bastian Hecht
2012-12-14 15:33       ` Bastian Hecht
2012-12-14 18:08       ` Rob Herring
2012-12-14 18:08         ` Rob Herring
2012-12-17 11:01         ` Bastian Hecht
2012-12-17 11:01           ` Bastian Hecht
2012-12-14  3:20 ` [PATCH 1/2] ARM: SH-Mobile: sh73a0: Secondary CPUs handle own SCU flags Magnus Damm
2012-12-14  3:20   ` Magnus Damm
2012-12-14 13:47   ` Simon Horman
2012-12-14 13:47     ` Simon Horman
2012-12-14 14:06 ` Rob Herring
2012-12-14 14:06   ` Rob Herring
2012-12-14 15:07   ` Bastian Hecht
2012-12-14 15:07     ` Bastian Hecht

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.