[PATCH 00/13] The big.LITTLE In-Kernel Switcher (IKS), part 1

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH 00/13] The big.LITTLE In-Kernel Switcher (IKS), part 1
@ 2013-07-23  3:31 Nicolas Pitre
  2013-07-23  3:31 ` [PATCH 01/13] ARM: suspend: use hash of cpu_logical_map value to index into save array Nicolas Pitre
                   ` (12 more replies)
  0 siblings, 13 replies; 43+ messages in thread
From: Nicolas Pitre @ 2013-07-23  3:31 UTC (permalink / raw)
  To: linux-arm-kernel

This is the initial posting of the b.L switcher patches for public review.
The tentative goal is to have those patches merged upstream during the
next (v3.12) merge window.

The rationale for this code can be found here:

   http://lwn.net/Articles/481055/

Here is the first part of the patch series with only the core patches
providing basic switcher functionality.  The second part consists of
optimizations to reduce switching latency as well as tracing
infrastructure support.  The third part adds switcher support to the
arm_big_little cpufreq driver.

To simplify the review process, only the first part is posted for
review at this time.  The first and second parts still can be found here:

   git://git.linaro.org/people/nico/linux iks

The first and second parts are meant to go through RMK's tree.
The third part should be merged via the cpufreq tree once at least
the first part has been accepted for mainline inclusion.

As the diffstat shows, this code is very much self contained and doesn't
really intrude into core parts of the kernel.

 arch/arm/Kconfig                       |  25 ++
 arch/arm/common/Makefile               |   2 +
 arch/arm/common/bL_switcher.c          | 611 +++++++++++++++++++++++++++
 arch/arm/common/bL_switcher_dummy_if.c |  71 ++++
 arch/arm/include/asm/bL_switcher.h     |  17 +
 arch/arm/kernel/sleep.S                |  25 +-
 arch/arm/kernel/suspend.c              |   8 +-
 drivers/irqchip/irq-gic.c              | 102 ++++-
 include/linux/irqchip/arm-gic.h        |   5 +
 9 files changed, 846 insertions(+), 20 deletions(-)

Nicolas

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH 01/13] ARM: suspend: use hash of cpu_logical_map value to index into save array
  2013-07-23  3:31 [PATCH 00/13] The big.LITTLE In-Kernel Switcher (IKS), part 1 Nicolas Pitre
@ 2013-07-23  3:31 ` Nicolas Pitre
  2013-07-24 16:47   ` Dave Martin
  2013-07-29 11:50   ` Lorenzo Pieralisi
  2013-07-23  3:31 ` [PATCH 02/13] ARM: gic: add CPU migration support Nicolas Pitre
                   ` (11 subsequent siblings)
  12 siblings, 2 replies; 43+ messages in thread
From: Nicolas Pitre @ 2013-07-23  3:31 UTC (permalink / raw)
  To: linux-arm-kernel

Currently we hash the MPIDR of the CPU being suspended to determine which
entry in the sleep_save_sp array to use. In some situations, such as when
we want to resume on another physical CPU, the MPIDR of another CPU should
be used instead.

So let's use the value of cpu_logical_map(smp_processor_id()) in place
of the MPIDR in the suspend path.  This will result in the same index
being used as with the previous code unless the caller has modified
cpu_logical_map() beforehand.

The register allocation in __cpu_suspend is reworked in order to better
accommodate the additional argument.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
---
 arch/arm/kernel/sleep.S   | 25 +++++++++++--------------
 arch/arm/kernel/suspend.c |  8 +++++---
 2 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S
index db1536b8b3..836d10e698 100644
--- a/arch/arm/kernel/sleep.S
+++ b/arch/arm/kernel/sleep.S
@@ -55,6 +55,7 @@
  * specific registers and some other data for resume.
  *  r0 = suspend function arg0
  *  r1 = suspend function
+ *  r2 = MPIDR value used to index into sleep_save_sp
  */
 ENTRY(__cpu_suspend)
 	stmfd	sp!, {r4 - r11, lr}
@@ -67,23 +68,19 @@ ENTRY(__cpu_suspend)
 	mov	r5, sp			@ current virtual SP
 	add	r4, r4, #12		@ Space for pgd, virt sp, phys resume fn
 	sub	sp, sp, r4		@ allocate CPU state on stack
-	stmfd	sp!, {r0, r1}		@ save suspend func arg and pointer
-	add	r0, sp, #8		@ save pointer to save block
-	mov	r1, r4			@ size of save block
-	mov	r2, r5			@ virtual SP
 	ldr	r3, =sleep_save_sp
+	stmfd	sp!, {r0, r1}		@ save suspend func arg and pointer
 	ldr	r3, [r3, #SLEEP_SAVE_SP_VIRT]
-	ALT_SMP(mrc p15, 0, r9, c0, c0, 5)
-        ALT_UP_B(1f)
-	ldr	r8, =mpidr_hash
-	/*
-	 * This ldmia relies on the memory layout of the mpidr_hash
-	 * struct mpidr_hash.
-	 */
-	ldmia	r8, {r4-r7}	@ r4 = mpidr mask (r5,r6,r7) = l[0,1,2] shifts
-	compute_mpidr_hash	lr, r5, r6, r7, r9, r4
-	add	r3, r3, lr, lsl #2
+	ALT_SMP(ldr r0, =mpidr_hash)
+       ALT_UP_B(1f)
+	/* This ldmia relies on the memory layout of the mpidr_hash struct */
+	ldmia	r0, {r1, r6-r8}	@ r1 = mpidr mask (r6,r7,r8) = l[0,1,2] shifts
+	compute_mpidr_hash	r0, r6, r7, r8, r2, r1
+	add	r3, r3, r0, lsl #2
 1:
+	mov	r2, r5			@ virtual SP
+	mov	r1, r4			@ size of save block
+	add	r0, sp, #8		@ pointer to save block
 	bl	__cpu_suspend_save
 	adr	lr, BSYM(cpu_suspend_abort)
 	ldmfd	sp!, {r0, pc}		@ call suspend fn
diff --git a/arch/arm/kernel/suspend.c b/arch/arm/kernel/suspend.c
index 41cf3cbf75..2835d35234 100644
--- a/arch/arm/kernel/suspend.c
+++ b/arch/arm/kernel/suspend.c
@@ -10,7 +10,7 @@
 #include <asm/suspend.h>
 #include <asm/tlbflush.h>
 
-extern int __cpu_suspend(unsigned long, int (*)(unsigned long));
+extern int __cpu_suspend(unsigned long, int (*)(unsigned long), u32 cpuid);
 extern void cpu_resume_mmu(void);
 
 #ifdef CONFIG_MMU
@@ -21,6 +21,7 @@ extern void cpu_resume_mmu(void);
 int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
 {
 	struct mm_struct *mm = current->active_mm;
+	u32 __mpidr = cpu_logical_map(smp_processor_id());
 	int ret;
 
 	if (!idmap_pgd)
@@ -32,7 +33,7 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
 	 * resume (indicated by a zero return code), we need to switch
 	 * back to the correct page tables.
 	 */
-	ret = __cpu_suspend(arg, fn);
+	ret = __cpu_suspend(arg, fn, __mpidr);
 	if (ret == 0) {
 		cpu_switch_mm(mm->pgd, mm);
 		local_flush_bp_all();
@@ -44,7 +45,8 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
 #else
 int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
 {
-	return __cpu_suspend(arg, fn);
+	u32 __mpidr = cpu_logical_map(smp_processor_id());
+	return __cpu_suspend(arg, fn, __mpidr);
 }
 #define	idmap_pgd	NULL
 #endif
-- 
1.8.1.2

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 02/13] ARM: gic: add CPU migration support
  2013-07-23  3:31 [PATCH 00/13] The big.LITTLE In-Kernel Switcher (IKS), part 1 Nicolas Pitre
  2013-07-23  3:31 ` [PATCH 01/13] ARM: suspend: use hash of cpu_logical_map value to index into save array Nicolas Pitre
@ 2013-07-23  3:31 ` Nicolas Pitre
  2013-07-25 11:44   ` Jonathan Austin
  2013-07-23  3:31 ` [PATCH 03/13] ARM: b.L: core switcher code Nicolas Pitre
                   ` (10 subsequent siblings)
  12 siblings, 1 reply; 43+ messages in thread
From: Nicolas Pitre @ 2013-07-23  3:31 UTC (permalink / raw)
  To: linux-arm-kernel

This is required by the big.LITTLE switcher code.

The gic_migrate_target() changes the CPU interface mapping for the
current CPU to redirect SGIs to the specified interface, and it also
updates the target CPU for each interrupts to that CPU interface
if they were targeting the current interface.  Finally, pending
SGIs for the current CPU are forwarded to the new interface.

Because Linux does not use it, the SGI source information for the
forwarded SGIs is not preserved.  Neither is the source information
for the SGIs sent by the current CPU to other CPUs adjusted to match
the new CPU interface mapping.  The required registers are banked so
only the target CPU could do it.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
---
 drivers/irqchip/irq-gic.c       | 81 +++++++++++++++++++++++++++++++++++++++--
 include/linux/irqchip/arm-gic.h |  4 ++
 2 files changed, 82 insertions(+), 3 deletions(-)

diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index ee7c503120..6bd5a8c1aa 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -253,10 +253,9 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
 	if (cpu >= NR_GIC_CPU_IF || cpu >= nr_cpu_ids)
 		return -EINVAL;
 
+	raw_spin_lock(&irq_controller_lock);
 	mask = 0xff << shift;
 	bit = gic_cpu_map[cpu] << shift;
-
-	raw_spin_lock(&irq_controller_lock);
 	val = readl_relaxed(reg) & ~mask;
 	writel_relaxed(val | bit, reg);
 	raw_spin_unlock(&irq_controller_lock);
@@ -646,7 +645,9 @@ static void __init gic_pm_init(struct gic_chip_data *gic)
 void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
 {
 	int cpu;
-	unsigned long map = 0;
+	unsigned long flags, map = 0;
+
+	raw_spin_lock_irqsave(&irq_controller_lock, flags);
 
 	/* Convert our logical CPU mask into a physical one. */
 	for_each_cpu(cpu, mask)
@@ -660,6 +661,80 @@ void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
 
 	/* this always happens on GIC0 */
 	writel_relaxed(map << 16 | irq, gic_data_dist_base(&gic_data[0]) + GIC_DIST_SOFTINT);
+
+	raw_spin_unlock_irqrestore(&irq_controller_lock, flags);
+}
+#endif
+
+#ifdef CONFIG_BL_SWITCHER
+/*
+ * gic_migrate_target - migrate IRQs to another PU interface
+ *
+ * @new_cpu_id: the CPU target ID to migrate IRQs to
+ *
+ * Migrate all peripheral interrupts with a target matching the current CPU
+ * to the interface corresponding to @new_cpu_id.  The CPU interface mapping
+ * is also updated.  Targets to other CPU interfaces are unchanged.
+ * This must be called with IRQs locally disabled.
+ */
+void gic_migrate_target(unsigned int new_cpu_id)
+{
+	unsigned int old_cpu_id, gic_irqs, gic_nr = 0;
+	void __iomem *dist_base;
+	int i, ror_val, cpu = smp_processor_id();
+	u32 val, old_mask, active_mask;
+
+	if (gic_nr >= MAX_GIC_NR)
+		BUG();
+
+	dist_base = gic_data_dist_base(&gic_data[gic_nr]);
+	if (!dist_base)
+		return;
+	gic_irqs = gic_data[gic_nr].gic_irqs;
+
+	old_cpu_id = __ffs(gic_cpu_map[cpu]);
+	old_mask = 0x01010101 << old_cpu_id;
+	ror_val = (old_cpu_id - new_cpu_id) & 31;
+
+	raw_spin_lock(&irq_controller_lock);
+
+	gic_cpu_map[cpu] = 1 << new_cpu_id;
+
+	for (i = 8; i < DIV_ROUND_UP(gic_irqs, 4); i++) {
+		val = readl_relaxed(dist_base + GIC_DIST_TARGET + i * 4);
+		active_mask = val & old_mask;
+		if (active_mask) {
+			val &= ~active_mask;
+			val |= ror32(active_mask, ror_val);
+			writel_relaxed(val, dist_base + GIC_DIST_TARGET + i*4);
+		}
+	}
+
+	raw_spin_unlock(&irq_controller_lock);
+
+	/*
+	 * Now let's migrate and clear any potential SGIs that might be
+	 * pending for us (old_cpu_id).  Since GIC_DIST_SGI_PENDING_SET
+	 * is a banked register, we can only forward the SGI using
+	 * GIC_DIST_SOFTINT.  The original SGI source is lost but Linux
+	 * doesn't use that information anyway.
+	 *
+	 * For the same reason we do not adjust SGI source information
+	 * for previously sent SGIs by us to other CPUs either.
+	 */
+	for (i = 0; i < 16; i += 4) {
+		int j;
+		val = readl_relaxed(dist_base + GIC_DIST_SGI_PENDING_SET + i);
+		if (!val)
+			continue;
+		writel_relaxed(val, dist_base + GIC_DIST_SGI_PENDING_CLEAR + i);
+		for (j = i; j < i + 4; j++) {
+			if (val & 0xff)
+				writel_relaxed((1 << (new_cpu_id + 16)) | j,
+						dist_base + GIC_DIST_SOFTINT);
+			val >>= 8;
+		}
+	}
 }
 #endif
 
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index 3e203eb23c..40bfcac959 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -31,6 +31,8 @@
 #define GIC_DIST_TARGET			0x800
 #define GIC_DIST_CONFIG			0xc00
 #define GIC_DIST_SOFTINT		0xf00
+#define GIC_DIST_SGI_PENDING_CLEAR	0xf10
+#define GIC_DIST_SGI_PENDING_SET	0xf20
 
 #define GICH_HCR			0x0
 #define GICH_VTR			0x4
@@ -73,6 +75,8 @@ static inline void gic_init(unsigned int nr, int start,
 	gic_init_bases(nr, start, dist, cpu, 0, NULL);
 }
 
+void gic_migrate_target(unsigned int new_cpu_id);
+
 #endif /* __ASSEMBLY */
 
 #endif
-- 
1.8.1.2

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 03/13] ARM: b.L: core switcher code
  2013-07-23  3:31 [PATCH 00/13] The big.LITTLE In-Kernel Switcher (IKS), part 1 Nicolas Pitre
  2013-07-23  3:31 ` [PATCH 01/13] ARM: suspend: use hash of cpu_logical_map value to index into save array Nicolas Pitre
  2013-07-23  3:31 ` [PATCH 02/13] ARM: gic: add CPU migration support Nicolas Pitre
@ 2013-07-23  3:31 ` Nicolas Pitre
  2013-07-25 17:15   ` Jonathan Austin
                     ` (2 more replies)
  2013-07-23  3:31 ` [PATCH 04/13] ARM: bL_switcher: add clockevent save/restore support Nicolas Pitre
                   ` (9 subsequent siblings)
  12 siblings, 3 replies; 43+ messages in thread
From: Nicolas Pitre @ 2013-07-23  3:31 UTC (permalink / raw)
  To: linux-arm-kernel

This is the core code implementing big.LITTLE switcher functionality.
Rational for this code is available here:

http://lwn.net/Articles/481055/

The main entry point for a switch request is:

void bL_switch_request(unsigned int cpu, unsigned int new_cluster_id)

If the calling CPU is not the wanted one, this wrapper takes care of
sending the request to the appropriate CPU with schedule_work_on().

At the moment the core switch operation is handled by bL_switch_to()
which must be called on the CPU for which a switch is requested.

What this code does:

  * Return early if the current cluster is the wanted one.

  * Close the gate in the kernel entry vector for both the inbound
    and outbound CPUs.

  * Wake up the inbound CPU so it can perform its reset sequence in
    parallel up to the kernel entry vector gate.

  * Migrate all interrupts in the GIC targeting the outbound CPU
    interface to the inbound CPU interface, including SGIs. This is
    performed by gic_migrate_target() in drivers/irqchip/irq-gic.c.

  * Call cpu_pm_enter() which takes care of flushing the VFP state to
    RAM and save the CPU interface config from the GIC to RAM.

  * Modify the cpu_logical_map to refer to the inbound physical CPU.

  * Call cpu_suspend() which saves the CPU state (general purpose
    registers, page table address) onto the stack and store the
    resulting stack pointer in an array indexed by the updated
    cpu_logical_map, then call the provided shutdown function.
    This happens in arch/arm/kernel/sleep.S.

At this point, the provided shutdown function executed by the outbound
CPU ungates the inbound CPU. Therefore the inbound CPU:

  * Picks up the saved stack pointer in the array indexed by its MPIDR
    in arch/arm/kernel/sleep.S.

  * The MMU and caches are re-enabled using the saved state on the
    provided stack, just like if this was a resume operation from a
    suspended state.

  * Then cpu_suspend() returns, although this is on the inbound CPU
    rather than the outbound CPU which called it initially.

  * The function cpu_pm_exit() is called which effect is to restore the
    CPU interface state in the GIC using the state previously saved by
    the outbound CPU.

  * Exit of bL_switch_to() to resume normal kernel execution on the
    new CPU.

However, the outbound CPU is potentially still running in parallel while
the inbound CPU is resuming normal kernel execution, hence we need
per CPU stack isolation to execute bL_do_switch().  After the outbound
CPU has ungated the inbound CPU, it calls mcpm_cpu_power_down() to:

  * Clean its L1 cache.

  * If it is the last CPU still alive in its cluster (last man standing),
    it also cleans its L2 cache and disables cache snooping from the other
    cluster.

  * Power down the CPU (or whole cluster).

Code called from bL_do_switch() might end up referencing 'current' for
some reasons.  However, 'current' is derived from the stack pointer.
With any arbitrary stack, the returned value for 'current' and any
dereferenced values through it are just random garbage which may lead to
segmentation faults.

The active page table during the execution of bL_do_switch() is also a
problem.  There is no guarantee that the inbound CPU won't destroy the
corresponding task which would free the attached page table while the
outbound CPU is still running and relying on it.

To solve both issues, we borrow some of the task space belonging to
the init/idle task which, by its nature, is lightly used and therefore
is unlikely to clash with our usage.  The init task is also never going
away.

Right now the logical CPU number is assumed to be equivalent to the
physical CPU number within each cluster. The kernel should also be
booted with only one cluster active.  These limitations will be lifted
eventually.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
---
 arch/arm/Kconfig                   |  17 +++
 arch/arm/common/Makefile           |   1 +
 arch/arm/common/bL_switcher.c      | 247 +++++++++++++++++++++++++++++++++++++
 arch/arm/include/asm/bL_switcher.h |  17 +++
 4 files changed, 282 insertions(+)
 create mode 100644 arch/arm/common/bL_switcher.c
 create mode 100644 arch/arm/include/asm/bL_switcher.h

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index ba412e02ec..2c9e5bf734 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1538,6 +1538,23 @@ config MCPM
 	  for (multi-)cluster based systems, such as big.LITTLE based
 	  systems.
 
+config BIG_LITTLE
+	bool "big.LITTLE support (Experimental)"
+	depends on CPU_V7 && SMP
+	select MCPM
+	help
+	  This option enables support for the big.LITTLE architecture.
+
+config BL_SWITCHER
+	bool "big.LITTLE switcher support"
+	depends on BIG_LITTLE && MCPM && HOTPLUG_CPU
+	select CPU_PM
+	select ARM_CPU_SUSPEND
+	help
+	  The big.LITTLE "switcher" provides the core functionality to
+	  transparently handle transition between a cluster of A15's
+	  and a cluster of A7's in a big.LITTLE system.
+
 choice
 	prompt "Memory split"
 	default VMSPLIT_3G
diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile
index 8c60f473e9..2586240d5a 100644
--- a/arch/arm/common/Makefile
+++ b/arch/arm/common/Makefile
@@ -17,3 +17,4 @@ obj-$(CONFIG_MCPM)		+= mcpm_head.o mcpm_entry.o mcpm_platsmp.o vlock.o
 AFLAGS_mcpm_head.o		:= -march=armv7-a
 AFLAGS_vlock.o			:= -march=armv7-a
 obj-$(CONFIG_TI_PRIV_EDMA)	+= edma.o
+obj-$(CONFIG_BL_SWITCHER)	+= bL_switcher.o
diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
new file mode 100644
index 0000000000..e63881b430
--- /dev/null
+++ b/arch/arm/common/bL_switcher.c
@@ -0,0 +1,247 @@
+/*
+ * arch/arm/common/bL_switcher.c -- big.LITTLE cluster switcher core driver
+ *
+ * Created by:	Nicolas Pitre, March 2012
+ * Copyright:	(C) 2012-2013  Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/cpu_pm.h>
+#include <linux/workqueue.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/irqchip/arm-gic.h>
+
+#include <asm/smp_plat.h>
+#include <asm/cacheflush.h>
+#include <asm/suspend.h>
+#include <asm/mcpm.h>
+#include <asm/bL_switcher.h>
+
+
+/*
+ * Use our own MPIDR accessors as the generic ones in asm/cputype.h have
+ * __attribute_const__ and we don't want the compiler to assume any
+ * constness here as the value _does_ change along some code paths.
+ */
+
+static int read_mpidr(void)
+{
+	unsigned int id;
+	asm volatile ("mrc\tp15, 0, %0, c0, c0, 5" : "=r" (id));
+	return id & MPIDR_HWID_BITMASK;
+}
+
+/*
+ * bL switcher core code.
+ */
+
+static void bL_do_switch(void *_unused)
+{
+	unsigned mpidr, cpuid, clusterid, ob_cluster, ib_cluster;
+
+	/*
+	 * We now have a piece of stack borrowed from the init task's.
+	 * Let's also switch to init_mm right away to match it.
+	 */
+	cpu_switch_mm(init_mm.pgd, &init_mm);
+
+	pr_debug("%s\n", __func__);
+
+	mpidr = read_mpidr();
+	cpuid = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+	ob_cluster = clusterid;
+	ib_cluster = clusterid ^ 1;
+
+	/*
+	 * Our state has been saved at this point.  Let's release our
+	 * inbound CPU.
+	 */
+	mcpm_set_entry_vector(cpuid, ib_cluster, cpu_resume);
+	sev();
+
+	/*
+	 * From this point, we must assume that our counterpart CPU might
+	 * have taken over in its parallel world already, as if execution
+	 * just returned from cpu_suspend().  It is therefore important to
+	 * be very careful not to make any change the other guy is not
+	 * expecting.  This is why we need stack isolation.
+	 *
+	 * Fancy under cover tasks could be performed here.  For now
+	 * we have none.
+	 */
+
+	/* Let's put ourself down. */
+	mcpm_cpu_power_down();
+
+	/* should never get here */
+	BUG();
+}
+
+/*
+ * Stack isolation.  To ensure 'current' remains valid, we just borrow
+ * a slice of the init/idle task which should be fairly lightly used.
+ * The borrowed area starts just above the thread_info structure located
+ * at the very bottom of the stack, aligned to a cache line.
+ */
+#define STACK_SIZE 256
+extern void call_with_stack(void (*fn)(void *), void *arg, void *sp);
+static int bL_switchpoint(unsigned long _arg)
+{
+	unsigned int mpidr = read_mpidr();
+	unsigned int cpuid = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	unsigned int clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+	unsigned int cpu_index = cpuid + clusterid * MAX_CPUS_PER_CLUSTER;
+	void *stack = &init_thread_info + 1;
+	stack = PTR_ALIGN(stack, L1_CACHE_BYTES);
+	stack += cpu_index * STACK_SIZE + STACK_SIZE;
+	call_with_stack(bL_do_switch, (void *)_arg, stack);
+	BUG();
+}
+
+/*
+ * Generic switcher interface
+ */
+
+/*
+ * bL_switch_to - Switch to a specific cluster for the current CPU
+ * @new_cluster_id: the ID of the cluster to switch to.
+ *
+ * This function must be called on the CPU to be switched.
+ * Returns 0 on success, else a negative status code.
+ */
+static int bL_switch_to(unsigned int new_cluster_id)
+{
+	unsigned int mpidr, cpuid, clusterid, ob_cluster, ib_cluster, this_cpu;
+	int ret;
+
+	mpidr = read_mpidr();
+	cpuid = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+	ob_cluster = clusterid;
+	ib_cluster = clusterid ^ 1;
+
+	if (new_cluster_id == clusterid)
+		return 0;
+
+	pr_debug("before switch: CPU %d in cluster %d\n", cpuid, clusterid);
+
+	/* Close the gate for our entry vectors */
+	mcpm_set_entry_vector(cpuid, ob_cluster, NULL);
+	mcpm_set_entry_vector(cpuid, ib_cluster, NULL);
+
+	/*
+	 * Let's wake up the inbound CPU now in case it requires some delay
+	 * to come online, but leave it gated in our entry vector code.
+	 */
+	ret = mcpm_cpu_power_up(cpuid, ib_cluster);
+	if (ret) {
+		pr_err("%s: mcpm_cpu_power_up() returned %d\n", __func__, ret);
+		return ret;
+	}
+
+	/*
+	 * From this point we are entering the switch critical zone
+	 * and can't sleep/schedule anymore.
+	 */
+	local_irq_disable();
+	local_fiq_disable();
+
+	this_cpu = smp_processor_id();
+
+	/* redirect GIC's SGIs to our counterpart */
+	gic_migrate_target(cpuid + ib_cluster*4);
+
+	/*
+	 * Raise a SGI on the inbound CPU to make sure it doesn't stall
+	 * in a possible WFI, such as in mcpm_power_down().
+	 */
+	arch_send_wakeup_ipi_mask(cpumask_of(this_cpu));
+
+	ret = cpu_pm_enter();
+
+	/* we can not tolerate errors at this point */
+	if (ret)
+		panic("%s: cpu_pm_enter() returned %d\n", __func__, ret);
+
+	/*
+	 * Flip the cluster in the CPU logical map for this CPU.
+	 * This must be flushed to RAM as the resume code
+	 * needs to access it while the caches are still disabled.
+	 */
+	cpu_logical_map(this_cpu) ^= (1 << 8);
+	sync_cache_w(&cpu_logical_map(this_cpu));
+
+	/* Let's do the actual CPU switch. */
+	ret = cpu_suspend(0, bL_switchpoint);
+	if (ret > 0)
+		panic("%s: cpu_suspend() returned %d\n", __func__, ret);
+
+	/* We are executing on the inbound CPU at this point */
+	mpidr = read_mpidr();
+	cpuid = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+	pr_debug("after switch: CPU %d in cluster %d\n", cpuid, clusterid);
+	BUG_ON(clusterid != ib_cluster);
+
+	mcpm_cpu_powered_up();
+
+	ret = cpu_pm_exit();
+
+	local_fiq_enable();
+	local_irq_enable();
+
+	if (ret)
+		pr_err("%s exiting with error %d\n", __func__, ret);
+	return ret;
+}
+
+struct switch_args {
+	unsigned int cluster;
+	struct work_struct work;
+};
+
+static void __bL_switch_to(struct work_struct *work)
+{
+	struct switch_args *args = container_of(work, struct switch_args, work);
+	bL_switch_to(args->cluster);
+}
+
+/*
+ * bL_switch_request - Switch to a specific cluster for the given CPU
+ *
+ * @cpu: the CPU to switch
+ * @new_cluster_id: the ID of the cluster to switch to.
+ *
+ * This function causes a cluster switch on the given CPU.  If the given
+ * CPU is the same as the calling CPU then the switch happens right away.
+ * Otherwise the request is put on a work queue to be scheduled on the
+ * remote CPU.
+ */
+void bL_switch_request(unsigned int cpu, unsigned int new_cluster_id)
+{
+	unsigned int this_cpu = get_cpu();
+	struct switch_args args;
+
+	if (cpu == this_cpu) {
+		bL_switch_to(new_cluster_id);
+		put_cpu();
+		return;
+	}
+	put_cpu();
+
+	args.cluster = new_cluster_id;
+	INIT_WORK_ONSTACK(&args.work, __bL_switch_to);
+	schedule_work_on(cpu, &args.work);
+	flush_work(&args.work);
+}
+EXPORT_SYMBOL_GPL(bL_switch_request);
diff --git a/arch/arm/include/asm/bL_switcher.h b/arch/arm/include/asm/bL_switcher.h
new file mode 100644
index 0000000000..72efe3f349
--- /dev/null
+++ b/arch/arm/include/asm/bL_switcher.h
@@ -0,0 +1,17 @@
+/*
+ * arch/arm/include/asm/bL_switcher.h
+ *
+ * Created by:  Nicolas Pitre, April 2012
+ * Copyright:   (C) 2012-2013  Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef ASM_BL_SWITCHER_H
+#define ASM_BL_SWITCHER_H
+
+void bL_switch_request(unsigned int cpu, unsigned int new_cluster_id);
+
+#endif
-- 
1.8.1.2

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 04/13] ARM: bL_switcher: add clockevent save/restore support
  2013-07-23  3:31 [PATCH 00/13] The big.LITTLE In-Kernel Switcher (IKS), part 1 Nicolas Pitre
                   ` (2 preceding siblings ...)
  2013-07-23  3:31 ` [PATCH 03/13] ARM: b.L: core switcher code Nicolas Pitre
@ 2013-07-23  3:31 ` Nicolas Pitre
  2013-07-23  3:31 ` [PATCH 05/13] ARM: bL_switcher: move to dedicated threads rather than workqueues Nicolas Pitre
                   ` (8 subsequent siblings)
  12 siblings, 0 replies; 43+ messages in thread
From: Nicolas Pitre @ 2013-07-23  3:31 UTC (permalink / raw)
  To: linux-arm-kernel

From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>

Per-CPU timers that are shutdown when a CPU is switched over must be disabled
upon switching and reprogrammed on the inbound CPU by relying on the
clock events management API. save/restore sequence is executed with irqs
disabled as mandated by the clock events API.

The next_event is an absolute time, hence, when the inbound CPU resumes,
if the timer has expired the min delta is forced into the tick device to
fire after few cycles.

This patch adds switching support for clock events that are per-CPU and
have to be migrated when a switch takes place; the cpumask of the clock
event device is checked against the cpumask of the current cpu, and if
they match, the clockevent device mode is saved and it is put in
shutdown mode. Resume code reprogrammes the tick device accordingly.

Tested on A15/A7 fast models and architected timers.

Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Nicolas Pitre <nico@linaro.org>
---
 arch/arm/common/bL_switcher.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
index e63881b430..d6f7e507d9 100644
--- a/arch/arm/common/bL_switcher.c
+++ b/arch/arm/common/bL_switcher.c
@@ -15,7 +15,11 @@
 #include <linux/sched.h>
 #include <linux/interrupt.h>
 #include <linux/cpu_pm.h>
+#include <linux/cpumask.h>
 #include <linux/workqueue.h>
+#include <linux/clockchips.h>
+#include <linux/hrtimer.h>
+#include <linux/tick.h>
 #include <linux/mm.h>
 #include <linux/string.h>
 #include <linux/irqchip/arm-gic.h>
@@ -122,6 +126,8 @@ static int bL_switchpoint(unsigned long _arg)
 static int bL_switch_to(unsigned int new_cluster_id)
 {
 	unsigned int mpidr, cpuid, clusterid, ob_cluster, ib_cluster, this_cpu;
+	struct tick_device *tdev;
+	enum clock_event_mode tdev_mode;
 	int ret;
 
 	mpidr = read_mpidr();
@@ -167,6 +173,14 @@ static int bL_switch_to(unsigned int new_cluster_id)
 	 */
 	arch_send_wakeup_ipi_mask(cpumask_of(this_cpu));
 
+	tdev = tick_get_device(this_cpu);
+	if (tdev && !cpumask_equal(tdev->evtdev->cpumask, cpumask_of(this_cpu)))
+		tdev = NULL;
+	if (tdev) {
+		tdev_mode = tdev->evtdev->mode;
+		clockevents_set_mode(tdev->evtdev, CLOCK_EVT_MODE_SHUTDOWN);
+	}
+
 	ret = cpu_pm_enter();
 
 	/* we can not tolerate errors at this point */
@@ -197,6 +211,12 @@ static int bL_switch_to(unsigned int new_cluster_id)
 
 	ret = cpu_pm_exit();
 
+	if (tdev) {
+		clockevents_set_mode(tdev->evtdev, tdev_mode);
+		clockevents_program_event(tdev->evtdev,
+					  tdev->evtdev->next_event, 1);
+	}
+
 	local_fiq_enable();
 	local_irq_enable();
 
-- 
1.8.1.2

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 05/13] ARM: bL_switcher: move to dedicated threads rather than workqueues
  2013-07-23  3:31 [PATCH 00/13] The big.LITTLE In-Kernel Switcher (IKS), part 1 Nicolas Pitre
                   ` (3 preceding siblings ...)
  2013-07-23  3:31 ` [PATCH 04/13] ARM: bL_switcher: add clockevent save/restore support Nicolas Pitre
@ 2013-07-23  3:31 ` Nicolas Pitre
  2013-07-26 15:18   ` Lorenzo Pieralisi
  2013-07-23  3:31 ` [PATCH 06/13] ARM: bL_switcher: simplify stack isolation Nicolas Pitre
                   ` (7 subsequent siblings)
  12 siblings, 1 reply; 43+ messages in thread
From: Nicolas Pitre @ 2013-07-23  3:31 UTC (permalink / raw)
  To: linux-arm-kernel

The workqueues are problematic as they may be contended.
They can't be scheduled with top priority either.  Also the optimization
in bL_switch_request() to skip the workqueue entirely when the target CPU
and the calling CPU were the same didn't allow for bL_switch_request() to
be called from atomic context, as might be the case for some cpufreq
drivers.

Let's move to dedicated kthreads instead.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
---
 arch/arm/common/bL_switcher.c      | 101 ++++++++++++++++++++++++++++---------
 arch/arm/include/asm/bL_switcher.h |   2 +-
 2 files changed, 79 insertions(+), 24 deletions(-)

diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
index d6f7e507d9..c2355cafc9 100644
--- a/arch/arm/common/bL_switcher.c
+++ b/arch/arm/common/bL_switcher.c
@@ -15,8 +15,10 @@
 #include <linux/sched.h>
 #include <linux/interrupt.h>
 #include <linux/cpu_pm.h>
+#include <linux/cpu.h>
 #include <linux/cpumask.h>
-#include <linux/workqueue.h>
+#include <linux/kthread.h>
+#include <linux/wait.h>
 #include <linux/clockchips.h>
 #include <linux/hrtimer.h>
 #include <linux/tick.h>
@@ -225,15 +227,48 @@ static int bL_switch_to(unsigned int new_cluster_id)
 	return ret;
 }
 
-struct switch_args {
-	unsigned int cluster;
-	struct work_struct work;
+struct bL_thread {
+	struct task_struct *task;
+	wait_queue_head_t wq;
+	int wanted_cluster;
 };
 
-static void __bL_switch_to(struct work_struct *work)
+static struct bL_thread bL_threads[MAX_CPUS_PER_CLUSTER];
+
+static int bL_switcher_thread(void *arg)
+{
+	struct bL_thread *t = arg;
+	struct sched_param param = { .sched_priority = 1 };
+	int cluster;
+
+	sched_setscheduler_nocheck(current, SCHED_FIFO, &param);
+
+	do {
+		if (signal_pending(current))
+			flush_signals(current);
+		wait_event_interruptible(t->wq,
+				t->wanted_cluster != -1 ||
+				kthread_should_stop());
+		cluster = xchg(&t->wanted_cluster, -1);
+		if (cluster != -1)
+			bL_switch_to(cluster);
+	} while (!kthread_should_stop());
+
+	return 0;
+}
+
+static struct task_struct * __init bL_switcher_thread_create(int cpu, void *arg)
 {
-	struct switch_args *args = container_of(work, struct switch_args, work);
-	bL_switch_to(args->cluster);
+	struct task_struct *task;
+
+	task = kthread_create_on_node(bL_switcher_thread, arg,
+				      cpu_to_node(cpu), "kswitcher_%d", cpu);
+	if (!IS_ERR(task)) {
+		kthread_bind(task, cpu);
+		wake_up_process(task);
+	} else
+		pr_err("%s failed for CPU %d\n", __func__, cpu);
+	return task;
 }
 
 /*
@@ -242,26 +277,46 @@ static void __bL_switch_to(struct work_struct *work)
  * @cpu: the CPU to switch
  * @new_cluster_id: the ID of the cluster to switch to.
  *
- * This function causes a cluster switch on the given CPU.  If the given
- * CPU is the same as the calling CPU then the switch happens right away.
- * Otherwise the request is put on a work queue to be scheduled on the
- * remote CPU.
+ * This function causes a cluster switch on the given CPU by waking up
+ * the appropriate switcher thread.  This function may or may not return
+ * before the switch has occurred.
  */
-void bL_switch_request(unsigned int cpu, unsigned int new_cluster_id)
+int bL_switch_request(unsigned int cpu, unsigned int new_cluster_id)
 {
-	unsigned int this_cpu = get_cpu();
-	struct switch_args args;
+	struct bL_thread *t;
 
-	if (cpu == this_cpu) {
-		bL_switch_to(new_cluster_id);
-		put_cpu();
-		return;
+	if (cpu >= MAX_CPUS_PER_CLUSTER) {
+		pr_err("%s: cpu %d out of bounds\n", __func__, cpu);
+		return -EINVAL;
 	}
-	put_cpu();
 
-	args.cluster = new_cluster_id;
-	INIT_WORK_ONSTACK(&args.work, __bL_switch_to);
-	schedule_work_on(cpu, &args.work);
-	flush_work(&args.work);
+	t = &bL_threads[cpu];
+	if (IS_ERR(t->task))
+		return PTR_ERR(t->task);
+	if (!t->task)
+		return -ESRCH;
+
+	t->wanted_cluster = new_cluster_id;
+	wake_up(&t->wq);
+	return 0;
 }
 EXPORT_SYMBOL_GPL(bL_switch_request);
+
+static int __init bL_switcher_init(void)
+{
+	int cpu;
+
+	pr_info("big.LITTLE switcher initializing\n");
+
+	for_each_online_cpu(cpu) {
+		struct bL_thread *t = &bL_threads[cpu];
+		init_waitqueue_head(&t->wq);
+		t->wanted_cluster = -1;
+		t->task = bL_switcher_thread_create(cpu, t);
+	}
+
+	pr_info("big.LITTLE switcher initialized\n");
+	return 0;
+}
+
+late_initcall(bL_switcher_init);
diff --git a/arch/arm/include/asm/bL_switcher.h b/arch/arm/include/asm/bL_switcher.h
index 72efe3f349..e0c0bba70b 100644
--- a/arch/arm/include/asm/bL_switcher.h
+++ b/arch/arm/include/asm/bL_switcher.h
@@ -12,6 +12,6 @@
 #ifndef ASM_BL_SWITCHER_H
 #define ASM_BL_SWITCHER_H
 
-void bL_switch_request(unsigned int cpu, unsigned int new_cluster_id);
+int bL_switch_request(unsigned int cpu, unsigned int new_cluster_id);
 
 #endif
-- 
1.8.1.2

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 06/13] ARM: bL_switcher: simplify stack isolation
  2013-07-23  3:31 [PATCH 00/13] The big.LITTLE In-Kernel Switcher (IKS), part 1 Nicolas Pitre
                   ` (4 preceding siblings ...)
  2013-07-23  3:31 ` [PATCH 05/13] ARM: bL_switcher: move to dedicated threads rather than workqueues Nicolas Pitre
@ 2013-07-23  3:31 ` Nicolas Pitre
  2013-07-23  3:31 ` [PATCH 07/13] ARM: bL_switcher: hot-unplug half of the available CPUs Nicolas Pitre
                   ` (6 subsequent siblings)
  12 siblings, 0 replies; 43+ messages in thread
From: Nicolas Pitre @ 2013-07-23  3:31 UTC (permalink / raw)
  To: linux-arm-kernel

We now have a dedicated thread for each logical CPU.  That's plenty
of stack space for our needs.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
---
 arch/arm/common/bL_switcher.c | 23 ++++++++---------------
 1 file changed, 8 insertions(+), 15 deletions(-)

diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
index c2355cafc9..3ff7bfe126 100644
--- a/arch/arm/common/bL_switcher.c
+++ b/arch/arm/common/bL_switcher.c
@@ -54,12 +54,6 @@ static void bL_do_switch(void *_unused)
 {
 	unsigned mpidr, cpuid, clusterid, ob_cluster, ib_cluster;
 
-	/*
-	 * We now have a piece of stack borrowed from the init task's.
-	 * Let's also switch to init_mm right away to match it.
-	 */
-	cpu_switch_mm(init_mm.pgd, &init_mm);
-
 	pr_debug("%s\n", __func__);
 
 	mpidr = read_mpidr();
@@ -94,22 +88,21 @@ static void bL_do_switch(void *_unused)
 }
 
 /*
- * Stack isolation.  To ensure 'current' remains valid, we just borrow
- * a slice of the init/idle task which should be fairly lightly used.
- * The borrowed area starts just above the thread_info structure located
- * at the very bottom of the stack, aligned to a cache line.
+ * Stack isolation.  To ensure 'current' remains valid, we just use another
+ * piece of our thread's stack space which should be fairly lightly used.
+ * The selected area starts just above the thread_info structure located
+ * at the very bottom of the stack, aligned to a cache line, and indexed
+ * with the cluster number.
  */
-#define STACK_SIZE 256
+#define STACK_SIZE 512
 extern void call_with_stack(void (*fn)(void *), void *arg, void *sp);
 static int bL_switchpoint(unsigned long _arg)
 {
 	unsigned int mpidr = read_mpidr();
-	unsigned int cpuid = MPIDR_AFFINITY_LEVEL(mpidr, 0);
 	unsigned int clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1);
-	unsigned int cpu_index = cpuid + clusterid * MAX_CPUS_PER_CLUSTER;
-	void *stack = &init_thread_info + 1;
+	void *stack = current_thread_info() + 1;
 	stack = PTR_ALIGN(stack, L1_CACHE_BYTES);
-	stack += cpu_index * STACK_SIZE + STACK_SIZE;
+	stack += clusterid * STACK_SIZE + STACK_SIZE;
 	call_with_stack(bL_do_switch, (void *)_arg, stack);
 	BUG();
 }
-- 
1.8.1.2

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 07/13] ARM: bL_switcher: hot-unplug half of the available CPUs
  2013-07-23  3:31 [PATCH 00/13] The big.LITTLE In-Kernel Switcher (IKS), part 1 Nicolas Pitre
                   ` (5 preceding siblings ...)
  2013-07-23  3:31 ` [PATCH 06/13] ARM: bL_switcher: simplify stack isolation Nicolas Pitre
@ 2013-07-23  3:31 ` Nicolas Pitre
  2013-07-23  3:31 ` [PATCH 08/13] ARM: bL_switcher: do not hardcode GIC IDs in the code Nicolas Pitre
                   ` (5 subsequent siblings)
  12 siblings, 0 replies; 43+ messages in thread
From: Nicolas Pitre @ 2013-07-23  3:31 UTC (permalink / raw)
  To: linux-arm-kernel

In a regular kernel configuration, all the CPUs are initially available.
But the switcher execution model uses half of them at any time.  Instead
of hacking the DTB to remove half of the CPUs, let's remove them at
run time and make sure we still have a working switcher configuration.
This way, the same DTB can be used whether or not the switcher is used.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
---
 arch/arm/common/bL_switcher.c | 78 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 77 insertions(+), 1 deletion(-)

diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
index 3ff7bfe126..4273be76ba 100644
--- a/arch/arm/common/bL_switcher.c
+++ b/arch/arm/common/bL_switcher.c
@@ -295,18 +295,94 @@ int bL_switch_request(unsigned int cpu, unsigned int new_cluster_id)
 }
 EXPORT_SYMBOL_GPL(bL_switch_request);
 
+/*
+ * Activation and configuration code.
+ */
+
+static cpumask_t bL_switcher_removed_logical_cpus;
+
+static void __init bL_switcher_restore_cpus(void)
+{
+	int i;
+
+	for_each_cpu(i, &bL_switcher_removed_logical_cpus)
+		cpu_up(i);
+}
+
+static int __init bL_switcher_halve_cpus(void)
+{
+	int cpu, cluster, i, ret;
+	cpumask_t cluster_mask[2], common_mask;
+
+	cpumask_clear(&bL_switcher_removed_logical_cpus);
+	cpumask_clear(&cluster_mask[0]);
+	cpumask_clear(&cluster_mask[1]);
+
+	for_each_online_cpu(i) {
+		cpu = cpu_logical_map(i) & 0xff;
+		cluster = (cpu_logical_map(i) >> 8) & 0xff;
+		if (cluster >= 2) {
+			pr_err("%s: only dual cluster systems are supported\n", __func__);
+			return -EINVAL;
+		}
+		cpumask_set_cpu(cpu, &cluster_mask[cluster]);
+	}
+
+	if (!cpumask_and(&common_mask, &cluster_mask[0], &cluster_mask[1])) {
+		pr_err("%s: no common set of CPUs\n", __func__);
+		return -EINVAL;
+	}
+
+	for_each_online_cpu(i) {
+		cpu = cpu_logical_map(i) & 0xff;
+		cluster = (cpu_logical_map(i) >> 8) & 0xff;
+
+		if (cpumask_test_cpu(cpu, &common_mask)) {
+			/*
+			 * We keep only those logical CPUs which number
+			 * is equal to their physical CPU number. This is
+			 * not perfect but good enough for now.
+			 */
+			if (cpu == i)
+				continue;
+		}
+
+		ret = cpu_down(i);
+		if (ret) {
+			bL_switcher_restore_cpus();
+			return ret;
+		}
+		cpumask_set_cpu(i, &bL_switcher_removed_logical_cpus);
+	}
+
+	return 0;
+}
+
 static int __init bL_switcher_init(void)
 {
-	int cpu;
+	int cpu, ret;
 
 	pr_info("big.LITTLE switcher initializing\n");
 
+	if (MAX_NR_CLUSTERS != 2) {
+		pr_err("%s: only dual cluster systems are supported\n", __func__);
+		return -EINVAL;
+	}
+
+	cpu_hotplug_driver_lock();
+	ret = bL_switcher_halve_cpus();
+	if (ret) {
+		cpu_hotplug_driver_unlock();
+		return ret;
+	}
+
 	for_each_online_cpu(cpu) {
 		struct bL_thread *t = &bL_threads[cpu];
 		init_waitqueue_head(&t->wq);
 		t->wanted_cluster = -1;
 		t->task = bL_switcher_thread_create(cpu, t);
 	}
+	cpu_hotplug_driver_unlock();
 
 	pr_info("big.LITTLE switcher initialized\n");
 	return 0;
-- 
1.8.1.2

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 08/13] ARM: bL_switcher: do not hardcode GIC IDs in the code
  2013-07-23  3:31 [PATCH 00/13] The big.LITTLE In-Kernel Switcher (IKS), part 1 Nicolas Pitre
                   ` (6 preceding siblings ...)
  2013-07-23  3:31 ` [PATCH 07/13] ARM: bL_switcher: hot-unplug half of the available CPUs Nicolas Pitre
@ 2013-07-23  3:31 ` Nicolas Pitre
  2013-07-23  3:31 ` [PATCH 09/13] ARM: bL_switcher: ability to enable and disable the switcher via sysfs Nicolas Pitre
                   ` (4 subsequent siblings)
  12 siblings, 0 replies; 43+ messages in thread
From: Nicolas Pitre @ 2013-07-23  3:31 UTC (permalink / raw)
  To: linux-arm-kernel

Currently, GIC IDs are hardcoded making the code dependent on the 4+4 b.L
configuration.  Let's allow for GIC IDs to be discovered upon switcher
initialization to support other b.L configurations such as the 1+1 one,
or 2+3 as on the VExpress TC2.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
---
 arch/arm/common/bL_switcher.c   | 14 +++++++++++++-
 drivers/irqchip/irq-gic.c       | 21 +++++++++++++++++++++
 include/linux/irqchip/arm-gic.h |  1 +
 3 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
index 4273be76ba..34eff3bc25 100644
--- a/arch/arm/common/bL_switcher.c
+++ b/arch/arm/common/bL_switcher.c
@@ -111,6 +111,8 @@ static int bL_switchpoint(unsigned long _arg)
  * Generic switcher interface
  */
 
+static unsigned int bL_gic_id[MAX_CPUS_PER_CLUSTER][MAX_NR_CLUSTERS];
+
 /*
  * bL_switch_to - Switch to a specific cluster for the current CPU
  * @new_cluster_id: the ID of the cluster to switch to.
@@ -160,7 +162,7 @@ static int bL_switch_to(unsigned int new_cluster_id)
 	this_cpu = smp_processor_id();
 
 	/* redirect GIC's SGIs to our counterpart */
-	gic_migrate_target(cpuid + ib_cluster*4);
+	gic_migrate_target(bL_gic_id[cpuid][ib_cluster]);
 
 	/*
 	 * Raise a SGI on the inbound CPU to make sure it doesn't stall
@@ -338,6 +340,16 @@ static int __init bL_switcher_halve_cpus(void)
 		cluster = (cpu_logical_map(i) >> 8) & 0xff;
 
 		if (cpumask_test_cpu(cpu, &common_mask)) {
+			/* Let's take note of the GIC ID for this CPU */
+			int gic_id = gic_get_cpu_id(i);
+			if (gic_id < 0) {
+				pr_err("%s: bad GIC ID for CPU %d\n", __func__, i);
+				return -EINVAL;
+			}
+			bL_gic_id[cpu][cluster] = gic_id;
+			pr_info("GIC ID for CPU %u cluster %u is %u\n",
+				cpu, cluster, gic_id);
+
 			/*
 			 * We keep only those logical CPUs which number
 			 * is equal to their physical CPU number. This is
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 6bd5a8c1aa..0b6b8bf5df 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -668,6 +668,27 @@ void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
 
 #ifdef CONFIG_BL_SWITCHER
 /*
+ * gic_get_cpu_id - get the CPU interface ID for the specified CPU
+ *
+ * @cpu: the logical CPU number to get the GIC ID for.
+ *
+ * Return the CPU interface ID for the given logical CPU number,
+ * or -1 if the CPU number is too large or the interface ID is
+ * unknown (more than one bit set).
+ */
+int gic_get_cpu_id(unsigned int cpu)
+{
+	unsigned int cpu_bit;
+
+	if (cpu >= NR_GIC_CPU_IF)
+		return -1;
+	cpu_bit = gic_cpu_map[cpu];
+	if (cpu_bit & (cpu_bit - 1))
+		return -1;
+	return __ffs(cpu_bit);
+}
+
+/*
  * gic_migrate_target - migrate IRQs to another PU interface
  *
  * @new_cpu_id: the CPU target ID to migrate IRQs to
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index 40bfcac959..2d7d47e8df 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -75,6 +75,7 @@ static inline void gic_init(unsigned int nr, int start,
 	gic_init_bases(nr, start, dist, cpu, 0, NULL);
 }
 
+int gic_get_cpu_id(unsigned int cpu);
 void gic_migrate_target(unsigned int new_cpu_id);
 
 #endif /* __ASSEMBLY */
-- 
1.8.1.2

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 09/13] ARM: bL_switcher: ability to enable and disable the switcher via sysfs
  2013-07-23  3:31 [PATCH 00/13] The big.LITTLE In-Kernel Switcher (IKS), part 1 Nicolas Pitre
                   ` (7 preceding siblings ...)
  2013-07-23  3:31 ` [PATCH 08/13] ARM: bL_switcher: do not hardcode GIC IDs in the code Nicolas Pitre
@ 2013-07-23  3:31 ` Nicolas Pitre
  2013-07-23  3:31 ` [PATCH 10/13] ARM: bL_switcher: add kernel cmdline param to disable the switcher on boot Nicolas Pitre
                   ` (3 subsequent siblings)
  12 siblings, 0 replies; 43+ messages in thread
From: Nicolas Pitre @ 2013-07-23  3:31 UTC (permalink / raw)
  To: linux-arm-kernel

The /sys/kernel/bL_switcher/enable file allows to enable or disable
the switcher by writing 1 or 0 to it respectively.  It is still enabled
by default on boot.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
---
 arch/arm/common/bL_switcher.c | 171 +++++++++++++++++++++++++++++++++++++++---
 1 file changed, 160 insertions(+), 11 deletions(-)

diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
index 34eff3bc25..abeeff5520 100644
--- a/arch/arm/common/bL_switcher.c
+++ b/arch/arm/common/bL_switcher.c
@@ -24,6 +24,7 @@
 #include <linux/tick.h>
 #include <linux/mm.h>
 #include <linux/string.h>
+#include <linux/sysfs.h>
 #include <linux/irqchip/arm-gic.h>
 
 #include <asm/smp_plat.h>
@@ -226,6 +227,7 @@ struct bL_thread {
 	struct task_struct *task;
 	wait_queue_head_t wq;
 	int wanted_cluster;
+	struct completion started;
 };
 
 static struct bL_thread bL_threads[MAX_CPUS_PER_CLUSTER];
@@ -237,6 +239,7 @@ static int bL_switcher_thread(void *arg)
 	int cluster;
 
 	sched_setscheduler_nocheck(current, SCHED_FIFO, &param);
+	complete(&t->started);
 
 	do {
 		if (signal_pending(current))
@@ -252,7 +255,7 @@ static int bL_switcher_thread(void *arg)
 	return 0;
 }
 
-static struct task_struct * __init bL_switcher_thread_create(int cpu, void *arg)
+static struct task_struct *bL_switcher_thread_create(int cpu, void *arg)
 {
 	struct task_struct *task;
 
@@ -301,9 +304,11 @@ EXPORT_SYMBOL_GPL(bL_switch_request);
  * Activation and configuration code.
  */
 
+static unsigned int bL_switcher_active;
+static unsigned int bL_switcher_cpu_original_cluster[MAX_CPUS_PER_CLUSTER];
 static cpumask_t bL_switcher_removed_logical_cpus;
 
-static void __init bL_switcher_restore_cpus(void)
+static void bL_switcher_restore_cpus(void)
 {
 	int i;
 
@@ -311,7 +316,7 @@ static void __init bL_switcher_restore_cpus(void)
 		cpu_up(i);
 }
 
-static int __init bL_switcher_halve_cpus(void)
+static int bL_switcher_halve_cpus(void)
 {
 	int cpu, cluster, i, ret;
 	cpumask_t cluster_mask[2], common_mask;
@@ -355,8 +360,10 @@ static int __init bL_switcher_halve_cpus(void)
 			 * is equal to their physical CPU number. This is
 			 * not perfect but good enough for now.
 			 */
-			if (cpu == i)
+			if (cpu == i) {
+				bL_switcher_cpu_original_cluster[cpu] = cluster;
 				continue;
+			}
 		}
 
 		ret = cpu_down(i);
@@ -370,18 +377,18 @@ static int __init bL_switcher_halve_cpus(void)
 	return 0;
 }
 
-static int __init bL_switcher_init(void)
+static int bL_switcher_enable(void)
 {
 	int cpu, ret;
 
-	pr_info("big.LITTLE switcher initializing\n");
-
-	if (MAX_NR_CLUSTERS != 2) {
-		pr_err("%s: only dual cluster systems are supported\n", __func__);
-		return -EINVAL;
+	cpu_hotplug_driver_lock();
+	if (bL_switcher_active) {
+		cpu_hotplug_driver_unlock();
+		return 0;
 	}
 
-	cpu_hotplug_driver_lock();
+	pr_info("big.LITTLE switcher initializing\n");
+
 	ret = bL_switcher_halve_cpus();
 	if (ret) {
 		cpu_hotplug_driver_unlock();
@@ -391,13 +398,155 @@ static int __init bL_switcher_init(void)
 	for_each_online_cpu(cpu) {
 		struct bL_thread *t = &bL_threads[cpu];
 		init_waitqueue_head(&t->wq);
+		init_completion(&t->started);
 		t->wanted_cluster = -1;
 		t->task = bL_switcher_thread_create(cpu, t);
 	}
+
+	bL_switcher_active = 1;
 	cpu_hotplug_driver_unlock();
 
 	pr_info("big.LITTLE switcher initialized\n");
 	return 0;
 }
 
+#ifdef CONFIG_SYSFS
+
+static void bL_switcher_disable(void)
+{
+	unsigned int cpu, cluster, i;
+	struct bL_thread *t;
+	struct task_struct *task;
+
+	cpu_hotplug_driver_lock();
+	if (!bL_switcher_active) {
+		cpu_hotplug_driver_unlock();
+		return;
+	}
+	bL_switcher_active = 0;
+
+	/*
+	 * To deactivate the switcher, we must shut down the switcher
+	 * threads to prevent any other requests from being accepted.
+	 * Then, if the final cluster for given logical CPU is not the
+	 * same as the original one, we'll recreate a switcher thread
+	 * just for the purpose of switching the CPU back without any
+	 * possibility for interference from external requests.
+	 */
+	for_each_online_cpu(cpu) {
+		BUG_ON(cpu != (cpu_logical_map(cpu) & 0xff));
+		t = &bL_threads[cpu];
+		task = t->task;
+		t->task = NULL;
+		if (!task || IS_ERR(task))
+			continue;
+		kthread_stop(task);
+		/* no more switch may happen on this CPU at this point */
+		cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(cpu), 1);
+		if (cluster == bL_switcher_cpu_original_cluster[cpu])
+			continue;
+		init_completion(&t->started);
+		t->wanted_cluster = bL_switcher_cpu_original_cluster[cpu];
+		task = bL_switcher_thread_create(cpu, t);
+		if (!IS_ERR(task)) {
+			wait_for_completion(&t->started);
+			kthread_stop(task);
+			cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(cpu), 1);
+			if (cluster == bL_switcher_cpu_original_cluster[cpu])
+				continue;
+		}
+		/* If execution gets here, we're in trouble. */
+		pr_crit("%s: unable to restore original cluster for CPU %d\n",
+			__func__, cpu);
+		for_each_cpu(i, &bL_switcher_removed_logical_cpus) {
+			if ((cpu_logical_map(i) & 0xff) != cpu)
+				continue;
+			pr_crit("%s: CPU %d can't be restored\n",
+				__func__, i);
+			cpumask_clear_cpu(i, &bL_switcher_removed_logical_cpus);
+			break;
+		}
+	}
+
+	bL_switcher_restore_cpus();
+	cpu_hotplug_driver_unlock();
+}
+
+static ssize_t bL_switcher_active_show(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%u\n", bL_switcher_active);
+}
+
+static ssize_t bL_switcher_active_store(struct kobject *kobj,
+		struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+
+	switch (buf[0]) {
+	case '0':
+		bL_switcher_disable();
+		ret = 0;
+		break;
+	case '1':
+		ret = bL_switcher_enable();
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	return (ret >= 0) ? count : ret;
+}
+
+static struct kobj_attribute bL_switcher_active_attr =
+	__ATTR(active, 0644, bL_switcher_active_show, bL_switcher_active_store);
+
+static struct attribute *bL_switcher_attrs[] = {
+	&bL_switcher_active_attr.attr,
+	NULL,
+};
+
+static struct attribute_group bL_switcher_attr_group = {
+	.attrs = bL_switcher_attrs,
+};
+
+static struct kobject *bL_switcher_kobj;
+
+static int __init bL_switcher_sysfs_init(void)
+{
+	int ret;
+
+	bL_switcher_kobj = kobject_create_and_add("bL_switcher", kernel_kobj);
+	if (!bL_switcher_kobj)
+		return -ENOMEM;
+	ret = sysfs_create_group(bL_switcher_kobj, &bL_switcher_attr_group);
+	if (ret)
+		kobject_put(bL_switcher_kobj);
+	return ret;
+}
+
+#endif  /* CONFIG_SYSFS */
+
+static int __init bL_switcher_init(void)
+{
+	int ret;
+
+	if (MAX_NR_CLUSTERS != 2) {
+		pr_err("%s: only dual cluster systems are supported\n", __func__);
+		return -EINVAL;
+	}
+
+	ret = bL_switcher_enable();
+	if (ret)
+		return ret;
+
+#ifdef CONFIG_SYSFS
+	ret = bL_switcher_sysfs_init();
+	if (ret)
+		pr_err("%s: unable to create sysfs entry\n", __func__);
+#endif
+
+	return 0;
+}
+
 late_initcall(bL_switcher_init);
-- 
1.8.1.2

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 10/13] ARM: bL_switcher: add kernel cmdline param to disable the switcher on boot
  2013-07-23  3:31 [PATCH 00/13] The big.LITTLE In-Kernel Switcher (IKS), part 1 Nicolas Pitre
                   ` (8 preceding siblings ...)
  2013-07-23  3:31 ` [PATCH 09/13] ARM: bL_switcher: ability to enable and disable the switcher via sysfs Nicolas Pitre
@ 2013-07-23  3:31 ` Nicolas Pitre
  2013-07-23  3:31 ` [PATCH 11/13] ARM: bL_switcher: veto CPU hotplug requests when the switcher is active Nicolas Pitre
                   ` (2 subsequent siblings)
  12 siblings, 0 replies; 43+ messages in thread
From: Nicolas Pitre @ 2013-07-23  3:31 UTC (permalink / raw)
  To: linux-arm-kernel

By adding no_bL_switcher to the kernel cmdline string, the switcher
won't be activated automatically at boot time.  It is still possible
to activate it later with:

	echo 1 > /sys/kernel/bL_switcher/active

Signed-off-by: Nicolas Pitre <nico@linaro.org>
---
 arch/arm/common/bL_switcher.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
index abeeff5520..704c4b4ef3 100644
--- a/arch/arm/common/bL_switcher.c
+++ b/arch/arm/common/bL_switcher.c
@@ -26,6 +26,7 @@
 #include <linux/string.h>
 #include <linux/sysfs.h>
 #include <linux/irqchip/arm-gic.h>
+#include <linux/moduleparam.h>
 
 #include <asm/smp_plat.h>
 #include <asm/cacheflush.h>
@@ -527,6 +528,9 @@ static int __init bL_switcher_sysfs_init(void)
 
 #endif  /* CONFIG_SYSFS */
 
+static bool no_bL_switcher;
+core_param(no_bL_switcher, no_bL_switcher, bool, 0644);
+
 static int __init bL_switcher_init(void)
 {
 	int ret;
@@ -536,9 +540,11 @@ static int __init bL_switcher_init(void)
 		return -EINVAL;
 	}
 
-	ret = bL_switcher_enable();
-	if (ret)
-		return ret;
+	if (!no_bL_switcher) {
+		ret = bL_switcher_enable();
+		if (ret)
+			return ret;
+	}
 
 #ifdef CONFIG_SYSFS
 	ret = bL_switcher_sysfs_init();
-- 
1.8.1.2

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 11/13] ARM: bL_switcher: veto CPU hotplug requests when the switcher is active
  2013-07-23  3:31 [PATCH 00/13] The big.LITTLE In-Kernel Switcher (IKS), part 1 Nicolas Pitre
                   ` (9 preceding siblings ...)
  2013-07-23  3:31 ` [PATCH 10/13] ARM: bL_switcher: add kernel cmdline param to disable the switcher on boot Nicolas Pitre
@ 2013-07-23  3:31 ` Nicolas Pitre
  2013-07-31 10:30   ` Lorenzo Pieralisi
  2013-07-23  3:31 ` [PATCH 12/13] ARM: bL_switcher: remove assumptions between logical and physical CPUs Nicolas Pitre
  2013-07-23  3:31 ` [PATCH 13/13] ARM: bL_switcher: add a simple /dev user interface for debugging purposes Nicolas Pitre
  12 siblings, 1 reply; 43+ messages in thread
From: Nicolas Pitre @ 2013-07-23  3:31 UTC (permalink / raw)
  To: linux-arm-kernel

Trying to support both the switcher and CPU hotplug at the same time
is quickly becoming very complex due to ambiguous semantics.  So let's
simply veto any hotplug requests when the switcher is active for now.

This restriction might be loosened eventually.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
---
 arch/arm/common/bL_switcher.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
index 704c4b4ef3..2fe3911601 100644
--- a/arch/arm/common/bL_switcher.c
+++ b/arch/arm/common/bL_switcher.c
@@ -528,6 +528,25 @@ static int __init bL_switcher_sysfs_init(void)
 
 #endif  /* CONFIG_SYSFS */
 
+/*
+ * Veto any CPU hotplug operation while the switcher is active.
+ * We're just not ready to deal with that given the trickery involved.
+ */
+static int bL_switcher_hotplug_callback(struct notifier_block *nfb,
+					unsigned long action, void *hcpu)
+{
+	switch (action) {
+	case CPU_UP_PREPARE:
+	case CPU_DOWN_PREPARE:
+		if (bL_switcher_active)
+			return NOTIFY_BAD;
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block bL_switcher_hotplug_notifier =
+	{ &bL_switcher_hotplug_callback, NULL, 0 };
+
 static bool no_bL_switcher;
 core_param(no_bL_switcher, no_bL_switcher, bool, 0644);
 
@@ -540,6 +559,8 @@ static int __init bL_switcher_init(void)
 		return -EINVAL;
 	}
 
+	register_cpu_notifier(&bL_switcher_hotplug_notifier);
+
 	if (!no_bL_switcher) {
 		ret = bL_switcher_enable();
 		if (ret)
-- 
1.8.1.2

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 12/13] ARM: bL_switcher: remove assumptions between logical and physical CPUs
  2013-07-23  3:31 [PATCH 00/13] The big.LITTLE In-Kernel Switcher (IKS), part 1 Nicolas Pitre
                   ` (10 preceding siblings ...)
  2013-07-23  3:31 ` [PATCH 11/13] ARM: bL_switcher: veto CPU hotplug requests when the switcher is active Nicolas Pitre
@ 2013-07-23  3:31 ` Nicolas Pitre
  2013-07-30 16:30   ` Lorenzo Pieralisi
  2013-07-23  3:31 ` [PATCH 13/13] ARM: bL_switcher: add a simple /dev user interface for debugging purposes Nicolas Pitre
  12 siblings, 1 reply; 43+ messages in thread
From: Nicolas Pitre @ 2013-07-23  3:31 UTC (permalink / raw)
  To: linux-arm-kernel

Up to now, the logical CPU was somehow tied to the physical CPU number
within a cluster.  This causes problems when forcing the boot CPU to be
different from the first enumerated CPU in the device tree creating a
discrepancy between logical and physical CPU numbers.

Let's make the pairing completely independent from physical CPU numbers.

Let's keep only those logical CPUs with same initial CPU cluster to create
a uniform scheduler profile without having to modify any of the probed
topology and compute capacity data.  This has the potential to create
a non contiguous CPU numbering space when the switcher is active with
potential impact on buggy user space tools.  It is however better to fix
those tools rather than making the switcher code more intrusive.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
---
 arch/arm/common/bL_switcher.c | 180 +++++++++++++++++++++++++-----------------
 1 file changed, 106 insertions(+), 74 deletions(-)

diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
index 2fe3911601..7bf2396cb0 100644
--- a/arch/arm/common/bL_switcher.c
+++ b/arch/arm/common/bL_switcher.c
@@ -54,21 +54,19 @@ static int read_mpidr(void)
 
 static void bL_do_switch(void *_unused)
 {
-	unsigned mpidr, cpuid, clusterid, ob_cluster, ib_cluster;
+	unsigned ib_mpidr, ib_cpu, ib_cluster;
 
 	pr_debug("%s\n", __func__);
 
-	mpidr = read_mpidr();
-	cpuid = MPIDR_AFFINITY_LEVEL(mpidr, 0);
-	clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1);
-	ob_cluster = clusterid;
-	ib_cluster = clusterid ^ 1;
+	ib_mpidr = cpu_logical_map(smp_processor_id());
+	ib_cpu = MPIDR_AFFINITY_LEVEL(ib_mpidr, 0);
+	ib_cluster = MPIDR_AFFINITY_LEVEL(ib_mpidr, 1);
 
 	/*
 	 * Our state has been saved at this point.  Let's release our
 	 * inbound CPU.
 	 */
-	mcpm_set_entry_vector(cpuid, ib_cluster, cpu_resume);
+	mcpm_set_entry_vector(ib_cpu, ib_cluster, cpu_resume);
 	sev();
 
 	/*
@@ -114,6 +112,7 @@ static int bL_switchpoint(unsigned long _arg)
  */
 
 static unsigned int bL_gic_id[MAX_CPUS_PER_CLUSTER][MAX_NR_CLUSTERS];
+static int bL_switcher_cpu_pairing[NR_CPUS];
 
 /*
  * bL_switch_to - Switch to a specific cluster for the current CPU
@@ -124,31 +123,38 @@ static unsigned int bL_gic_id[MAX_CPUS_PER_CLUSTER][MAX_NR_CLUSTERS];
  */
 static int bL_switch_to(unsigned int new_cluster_id)
 {
-	unsigned int mpidr, cpuid, clusterid, ob_cluster, ib_cluster, this_cpu;
+	unsigned int mpidr, this_cpu, that_cpu;
+	unsigned int ob_mpidr, ob_cpu, ob_cluster, ib_mpidr, ib_cpu, ib_cluster;
 	struct tick_device *tdev;
 	enum clock_event_mode tdev_mode;
 	int ret;
 
-	mpidr = read_mpidr();
-	cpuid = MPIDR_AFFINITY_LEVEL(mpidr, 0);
-	clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1);
-	ob_cluster = clusterid;
-	ib_cluster = clusterid ^ 1;
+	this_cpu = smp_processor_id();
+	ob_mpidr = read_mpidr();
+	ob_cpu = MPIDR_AFFINITY_LEVEL(ob_mpidr, 0);
+	ob_cluster = MPIDR_AFFINITY_LEVEL(ob_mpidr, 1);
+	BUG_ON(cpu_logical_map(this_cpu) != ob_mpidr);
 
-	if (new_cluster_id == clusterid)
+	if (new_cluster_id == ob_cluster)
 		return 0;
 
-	pr_debug("before switch: CPU %d in cluster %d\n", cpuid, clusterid);
+	that_cpu = bL_switcher_cpu_pairing[this_cpu];
+	ib_mpidr = cpu_logical_map(that_cpu);
+	ib_cpu = MPIDR_AFFINITY_LEVEL(ib_mpidr, 0);
+	ib_cluster = MPIDR_AFFINITY_LEVEL(ib_mpidr, 1);
+
+	pr_debug("before switch: CPU %d MPIDR %#x -> %#x\n",
+		 this_cpu, ob_mpidr, ib_mpidr);
 
 	/* Close the gate for our entry vectors */
-	mcpm_set_entry_vector(cpuid, ob_cluster, NULL);
-	mcpm_set_entry_vector(cpuid, ib_cluster, NULL);
+	mcpm_set_entry_vector(ob_cpu, ob_cluster, NULL);
+	mcpm_set_entry_vector(ib_cpu, ib_cluster, NULL);
 
 	/*
 	 * Let's wake up the inbound CPU now in case it requires some delay
 	 * to come online, but leave it gated in our entry vector code.
 	 */
-	ret = mcpm_cpu_power_up(cpuid, ib_cluster);
+	ret = mcpm_cpu_power_up(ib_cpu, ib_cluster);
 	if (ret) {
 		pr_err("%s: mcpm_cpu_power_up() returned %d\n", __func__, ret);
 		return ret;
@@ -161,10 +167,8 @@ static int bL_switch_to(unsigned int new_cluster_id)
 	local_irq_disable();
 	local_fiq_disable();
 
-	this_cpu = smp_processor_id();
-
 	/* redirect GIC's SGIs to our counterpart */
-	gic_migrate_target(bL_gic_id[cpuid][ib_cluster]);
+	gic_migrate_target(bL_gic_id[ib_cpu][ib_cluster]);
 
 	/*
 	 * Raise a SGI on the inbound CPU to make sure it doesn't stall
@@ -187,11 +191,12 @@ static int bL_switch_to(unsigned int new_cluster_id)
 		panic("%s: cpu_pm_enter() returned %d\n", __func__, ret);
 
 	/*
-	 * Flip the cluster in the CPU logical map for this CPU.
+	 * Swap the physical CPUs in the logical map for this logical CPU.
 	 * This must be flushed to RAM as the resume code
 	 * needs to access it while the caches are still disabled.
 	 */
-	cpu_logical_map(this_cpu) ^= (1 << 8);
+	cpu_logical_map(this_cpu) = ib_mpidr;
+	cpu_logical_map(that_cpu) = ob_mpidr;
 	sync_cache_w(&cpu_logical_map(this_cpu));
 
 	/* Let's do the actual CPU switch. */
@@ -201,10 +206,8 @@ static int bL_switch_to(unsigned int new_cluster_id)
 
 	/* We are executing on the inbound CPU@this point */
 	mpidr = read_mpidr();
-	cpuid = MPIDR_AFFINITY_LEVEL(mpidr, 0);
-	clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1);
-	pr_debug("after switch: CPU %d in cluster %d\n", cpuid, clusterid);
-	BUG_ON(clusterid != ib_cluster);
+	pr_debug("after switch: CPU %d MPIDR %#x\n", this_cpu, mpidr);
+	BUG_ON(mpidr != ib_mpidr);
 
 	mcpm_cpu_powered_up();
 
@@ -231,7 +234,7 @@ struct bL_thread {
 	struct completion started;
 };
 
-static struct bL_thread bL_threads[MAX_CPUS_PER_CLUSTER];
+static struct bL_thread bL_threads[NR_CPUS];
 
 static int bL_switcher_thread(void *arg)
 {
@@ -284,7 +287,7 @@ int bL_switch_request(unsigned int cpu, unsigned int new_cluster_id)
 {
 	struct bL_thread *t;
 
-	if (cpu >= MAX_CPUS_PER_CLUSTER) {
+	if (cpu >= ARRAY_SIZE(bL_threads)) {
 		pr_err("%s: cpu %d out of bounds\n", __func__, cpu);
 		return -EINVAL;
 	}
@@ -306,7 +309,7 @@ EXPORT_SYMBOL_GPL(bL_switch_request);
  */
 
 static unsigned int bL_switcher_active;
-static unsigned int bL_switcher_cpu_original_cluster[MAX_CPUS_PER_CLUSTER];
+static unsigned int bL_switcher_cpu_original_cluster[NR_CPUS];
 static cpumask_t bL_switcher_removed_logical_cpus;
 
 static void bL_switcher_restore_cpus(void)
@@ -319,52 +322,86 @@ static void bL_switcher_restore_cpus(void)
 
 static int bL_switcher_halve_cpus(void)
 {
-	int cpu, cluster, i, ret;
-	cpumask_t cluster_mask[2], common_mask;
-
-	cpumask_clear(&bL_switcher_removed_logical_cpus);
-	cpumask_clear(&cluster_mask[0]);
-	cpumask_clear(&cluster_mask[1]);
+	int i, j, cluster_0, gic_id, ret;
+	unsigned int cpu, cluster, mask;
+	cpumask_t available_cpus;
 
+	/* First pass to validate what we have */
+	mask = 0;
 	for_each_online_cpu(i) {
-		cpu = cpu_logical_map(i) & 0xff;
-		cluster = (cpu_logical_map(i) >> 8) & 0xff;
+		cpu = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 0);
+		cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 1);
 		if (cluster >= 2) {
 			pr_err("%s: only dual cluster systems are supported\n", __func__);
 			return -EINVAL;
 		}
-		cpumask_set_cpu(cpu, &cluster_mask[cluster]);
+		if (WARN_ON(cpu >= MAX_CPUS_PER_CLUSTER))
+			return -EINVAL;
+		mask |= (1 << cluster);
 	}
-
-	if (!cpumask_and(&common_mask, &cluster_mask[0], &cluster_mask[1])) {
-		pr_err("%s: no common set of CPUs\n", __func__);
+	if (mask != 3) {
+		pr_err("%s: no CPU pairing possible\n", __func__);
 		return -EINVAL;
 	}
 
-	for_each_online_cpu(i) {
-		cpu = cpu_logical_map(i) & 0xff;
-		cluster = (cpu_logical_map(i) >> 8) & 0xff;
-
-		if (cpumask_test_cpu(cpu, &common_mask)) {
-			/* Let's take note of the GIC ID for this CPU */
-			int gic_id = gic_get_cpu_id(i);
-			if (gic_id < 0) {
-				pr_err("%s: bad GIC ID for CPU %d\n", __func__, i);
-				return -EINVAL;
-			}
-			bL_gic_id[cpu][cluster] = gic_id;
-			pr_info("GIC ID for CPU %u cluster %u is %u\n",
-				cpu, cluster, gic_id);
-
+	/*
+	 * Now let's do the pairing.  We match each CPU with another CPU
+	 * from a different cluster.  To get a uniform scheduling behavior
+	 * without fiddling with CPU topology and compute capacity data,
+	 * we'll use logical CPUs initially belonging to the same cluster.
+	 */
+	memset(bL_switcher_cpu_pairing, -1, sizeof(bL_switcher_cpu_pairing));
+	cpumask_copy(&available_cpus, cpu_online_mask);
+	cluster_0 = -1;
+	for_each_cpu(i, &available_cpus) {
+		int match = -1;
+		cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 1);
+		if (cluster_0 == -1)
+			cluster_0 = cluster;
+		if (cluster != cluster_0)
+			continue;
+		cpumask_clear_cpu(i, &available_cpus);
+		for_each_cpu(j, &available_cpus) {
+			cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(j), 1);
 			/*
-			 * We keep only those logical CPUs which number
-			 * is equal to their physical CPU number. This is
-			 * not perfect but good enough for now.
+			 * Let's remember the last match to create "odd"
+			 * pairings on purpose in order for other code not
+			 * to assume any relation between physical and
+			 * logical CPU numbers.
 			 */
-			if (cpu == i) {
-				bL_switcher_cpu_original_cluster[cpu] = cluster;
-				continue;
-			}
+			if (cluster != cluster_0)
+				match = j;
+		}
+		if (match != -1) {
+			bL_switcher_cpu_pairing[i] = match;
+			cpumask_clear_cpu(match, &available_cpus);
+			pr_info("CPU%d paired with CPU%d\n", i, match);
+		}
+	}
+
+	/*
+	 * Now we disable the unwanted CPUs i.e. everything that has no
+	 * pairing information (that includes the pairing counterparts).
+	 */
+	cpumask_clear(&bL_switcher_removed_logical_cpus);
+	for_each_online_cpu(i) {
+		cpu = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 0);
+		cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 1);
+
+		/* Let's take note of the GIC ID for this CPU */
+		gic_id = gic_get_cpu_id(i);
+		if (gic_id < 0) {
+			pr_err("%s: bad GIC ID for CPU %d\n", __func__, i);
+			bL_switcher_restore_cpus();
+			return -EINVAL;
+		}
+		bL_gic_id[cpu][cluster] = gic_id;
+		pr_info("GIC ID for CPU %u cluster %u is %u\n",
+			cpu, cluster, gic_id);
+
+		if (bL_switcher_cpu_pairing[i] != -1) {
+			bL_switcher_cpu_original_cluster[i] = cluster;
+			continue;
 		}
 
 		ret = cpu_down(i);
@@ -415,7 +452,7 @@ static int bL_switcher_enable(void)
 
 static void bL_switcher_disable(void)
 {
-	unsigned int cpu, cluster, i;
+	unsigned int cpu, cluster;
 	struct bL_thread *t;
 	struct task_struct *task;
 
@@ -435,7 +472,6 @@ static void bL_switcher_disable(void)
 	 * possibility for interference from external requests.
 	 */
 	for_each_online_cpu(cpu) {
-		BUG_ON(cpu != (cpu_logical_map(cpu) & 0xff));
 		t = &bL_threads[cpu];
 		task = t->task;
 		t->task = NULL;
@@ -459,14 +495,10 @@ static void bL_switcher_disable(void)
 		/* If execution gets here, we're in trouble. */
 		pr_crit("%s: unable to restore original cluster for CPU %d\n",
 			__func__, cpu);
-		for_each_cpu(i, &bL_switcher_removed_logical_cpus) {
-			if ((cpu_logical_map(i) & 0xff) != cpu)
-				continue;
-			pr_crit("%s: CPU %d can't be restored\n",
-				__func__, i);
-			cpumask_clear_cpu(i, &bL_switcher_removed_logical_cpus);
-			break;
-		}
+		pr_crit("%s: CPU %d can't be restored\n",
+			__func__, bL_switcher_cpu_pairing[cpu]);
+		cpumask_clear_cpu(bL_switcher_cpu_pairing[cpu],
+				  &bL_switcher_removed_logical_cpus);
 	}
 
 	bL_switcher_restore_cpus();
-- 
1.8.1.2

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 13/13] ARM: bL_switcher: add a simple /dev user interface for debugging purposes
  2013-07-23  3:31 [PATCH 00/13] The big.LITTLE In-Kernel Switcher (IKS), part 1 Nicolas Pitre
                   ` (11 preceding siblings ...)
  2013-07-23  3:31 ` [PATCH 12/13] ARM: bL_switcher: remove assumptions between logical and physical CPUs Nicolas Pitre
@ 2013-07-23  3:31 ` Nicolas Pitre
  12 siblings, 0 replies; 43+ messages in thread
From: Nicolas Pitre @ 2013-07-23  3:31 UTC (permalink / raw)
  To: linux-arm-kernel

Only the basic call to aid debugging.

*** NOT FOR PRODUCTION ***

Usage:

	echo <cpuid>,<clusterid> > /dev/b.L_switcher

where <cpuid> is the logical CPU number, and <clusterid> is 0 for the
first cluster and 1 for the second cluster.

Signed-off-by: nicolas Pitre <nico@linaro.org>
---
 arch/arm/Kconfig                       |  8 ++++
 arch/arm/common/Makefile               |  1 +
 arch/arm/common/bL_switcher_dummy_if.c | 71 ++++++++++++++++++++++++++++++++++
 3 files changed, 80 insertions(+)
 create mode 100644 arch/arm/common/bL_switcher_dummy_if.c

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 2c9e5bf734..ccc5b39594 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1555,6 +1555,14 @@ config BL_SWITCHER
 	  transparently handle transition between a cluster of A15's
 	  and a cluster of A7's in a big.LITTLE system.
 
+config BL_SWITCHER_DUMMY_IF
+	tristate "Simple big.LITTLE switcher user interface"
+	depends on BL_SWITCHER && DEBUG_KERNEL
+	help
+	  This is a simple and dummy char dev interface to control
+	  the big.LITTLE switcher core code.  It is meant for
+	  debugging purposes only.
+
 choice
 	prompt "Memory split"
 	default VMSPLIT_3G
diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile
index 2586240d5a..5c8584c494 100644
--- a/arch/arm/common/Makefile
+++ b/arch/arm/common/Makefile
@@ -18,3 +18,4 @@ AFLAGS_mcpm_head.o		:= -march=armv7-a
 AFLAGS_vlock.o			:= -march=armv7-a
 obj-$(CONFIG_TI_PRIV_EDMA)	+= edma.o
 obj-$(CONFIG_BL_SWITCHER)	+= bL_switcher.o
+obj-$(CONFIG_BL_SWITCHER_DUMMY_IF) += bL_switcher_dummy_if.o
diff --git a/arch/arm/common/bL_switcher_dummy_if.c b/arch/arm/common/bL_switcher_dummy_if.c
new file mode 100644
index 0000000000..3f47f1203c
--- /dev/null
+++ b/arch/arm/common/bL_switcher_dummy_if.c
@@ -0,0 +1,71 @@
+/*
+ * arch/arm/common/bL_switcher_dummy_if.c -- b.L switcher dummy interface
+ *
+ * Created by:	Nicolas Pitre, November 2012
+ * Copyright:	(C) 2012-2013  Linaro Limited
+ *
+ * Dummy interface to user space for debugging purpose only.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <asm/uaccess.h>
+#include <asm/bL_switcher.h>
+
+static ssize_t bL_switcher_write(struct file *file, const char __user *buf,
+			size_t len, loff_t *pos)
+{
+	unsigned char val[3];
+	unsigned int cpu, cluster;
+	int ret;
+
+	pr_debug("%s\n", __func__);
+
+	if (len < 3)
+		return -EINVAL;
+
+	if (copy_from_user(val, buf, 3))
+		return -EFAULT;
+
+	/* format: <cpu#>,<cluster#> */
+	if (val[0] < '0' || val[0] > '9' ||
+	    val[1] != ',' ||
+	    val[2] < '0' || val[2] > '1')
+		return -EINVAL;
+
+	cpu = val[0] - '0';
+	cluster = val[2] - '0';
+	ret = bL_switch_request(cpu, cluster);
+
+	return ret ? : len;
+}
+
+static const struct file_operations bL_switcher_fops = {
+	.write		= bL_switcher_write,
+	.owner	= THIS_MODULE,
+};
+
+static struct miscdevice bL_switcher_device = {
+	MISC_DYNAMIC_MINOR,
+	"b.L_switcher",
+	&bL_switcher_fops
+};
+
+static int __init bL_switcher_dummy_if_init(void)
+{
+	return misc_register(&bL_switcher_device);
+}
+
+static void __exit bL_switcher_dummy_if_exit(void)
+{
+	misc_deregister(&bL_switcher_device);
+}
+
+module_init(bL_switcher_dummy_if_init);
+module_exit(bL_switcher_dummy_if_exit);
-- 
1.8.1.2

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 01/13] ARM: suspend: use hash of cpu_logical_map value to index into save array
  2013-07-23  3:31 ` [PATCH 01/13] ARM: suspend: use hash of cpu_logical_map value to index into save array Nicolas Pitre
@ 2013-07-24 16:47   ` Dave Martin
  2013-07-24 18:55     ` Nicolas Pitre
  2013-07-29 11:50   ` Lorenzo Pieralisi
  1 sibling, 1 reply; 43+ messages in thread
From: Dave Martin @ 2013-07-24 16:47 UTC (permalink / raw)
  To: linux-arm-kernel

On Mon, Jul 22, 2013 at 11:31:17PM -0400, Nicolas Pitre wrote:
> Currently we hash the MPIDR of the CPU being suspended to determine which
> entry in the sleep_save_sp array to use. In some situations, such as when
> we want to resume on another physical CPU, the MPIDR of another CPU should
> be used instead.
> 
> So let's use the value of cpu_logical_map(smp_processor_id()) in place
> of the MPIDR in the suspend path.  This will result in the same index
> being used as with the previous code unless the caller has modified
> cpu_logical_map() beforehand.

This only works because we happen to swap MPIDRs in cpu_logical_map
before we get here, so that cpu_logical_map(smp_processor_id())
described the post-resume situation for this logical CPU, not the
current situation.

We have to swizzle cpu_logical_map() somewhere, and the suspend path is
just as good as the resume path for that.  But this patch commits us to
requiring that on the suspend path specifically -- I think that ought to
be mentioned somewhere.  A comment in the preamble for __cpu_suspend
would be enough, I think.

Looks like the patch should work though, and it does remove the need to
read the MPIDR, which is slightly nicer for the non-switcher case.

Cheers
---Dave

> 
> The register allocation in __cpu_suspend is reworked in order to better
> accommodate the additional argument.
> 
> Signed-off-by: Nicolas Pitre <nico@linaro.org>
> ---
>  arch/arm/kernel/sleep.S   | 25 +++++++++++--------------
>  arch/arm/kernel/suspend.c |  8 +++++---
>  2 files changed, 16 insertions(+), 17 deletions(-)
> 
> diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S
> index db1536b8b3..836d10e698 100644
> --- a/arch/arm/kernel/sleep.S
> +++ b/arch/arm/kernel/sleep.S
> @@ -55,6 +55,7 @@
>   * specific registers and some other data for resume.
>   *  r0 = suspend function arg0
>   *  r1 = suspend function
> + *  r2 = MPIDR value used to index into sleep_save_sp
>   */
>  ENTRY(__cpu_suspend)
>  	stmfd	sp!, {r4 - r11, lr}
> @@ -67,23 +68,19 @@ ENTRY(__cpu_suspend)
>  	mov	r5, sp			@ current virtual SP
>  	add	r4, r4, #12		@ Space for pgd, virt sp, phys resume fn
>  	sub	sp, sp, r4		@ allocate CPU state on stack
> -	stmfd	sp!, {r0, r1}		@ save suspend func arg and pointer
> -	add	r0, sp, #8		@ save pointer to save block
> -	mov	r1, r4			@ size of save block
> -	mov	r2, r5			@ virtual SP
>  	ldr	r3, =sleep_save_sp
> +	stmfd	sp!, {r0, r1}		@ save suspend func arg and pointer
>  	ldr	r3, [r3, #SLEEP_SAVE_SP_VIRT]
> -	ALT_SMP(mrc p15, 0, r9, c0, c0, 5)
> -        ALT_UP_B(1f)
> -	ldr	r8, =mpidr_hash
> -	/*
> -	 * This ldmia relies on the memory layout of the mpidr_hash
> -	 * struct mpidr_hash.
> -	 */
> -	ldmia	r8, {r4-r7}	@ r4 = mpidr mask (r5,r6,r7) = l[0,1,2] shifts
> -	compute_mpidr_hash	lr, r5, r6, r7, r9, r4
> -	add	r3, r3, lr, lsl #2
> +	ALT_SMP(ldr r0, =mpidr_hash)
> +       ALT_UP_B(1f)
> +	/* This ldmia relies on the memory layout of the mpidr_hash struct */
> +	ldmia	r0, {r1, r6-r8}	@ r1 = mpidr mask (r6,r7,r8) = l[0,1,2] shifts
> +	compute_mpidr_hash	r0, r6, r7, r8, r2, r1
> +	add	r3, r3, r0, lsl #2
>  1:
> +	mov	r2, r5			@ virtual SP
> +	mov	r1, r4			@ size of save block
> +	add	r0, sp, #8		@ pointer to save block
>  	bl	__cpu_suspend_save
>  	adr	lr, BSYM(cpu_suspend_abort)
>  	ldmfd	sp!, {r0, pc}		@ call suspend fn
> diff --git a/arch/arm/kernel/suspend.c b/arch/arm/kernel/suspend.c
> index 41cf3cbf75..2835d35234 100644
> --- a/arch/arm/kernel/suspend.c
> +++ b/arch/arm/kernel/suspend.c
> @@ -10,7 +10,7 @@
>  #include <asm/suspend.h>
>  #include <asm/tlbflush.h>
>  
> -extern int __cpu_suspend(unsigned long, int (*)(unsigned long));
> +extern int __cpu_suspend(unsigned long, int (*)(unsigned long), u32 cpuid);
>  extern void cpu_resume_mmu(void);
>  
>  #ifdef CONFIG_MMU
> @@ -21,6 +21,7 @@ extern void cpu_resume_mmu(void);
>  int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
>  {
>  	struct mm_struct *mm = current->active_mm;
> +	u32 __mpidr = cpu_logical_map(smp_processor_id());
>  	int ret;
>  
>  	if (!idmap_pgd)
> @@ -32,7 +33,7 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
>  	 * resume (indicated by a zero return code), we need to switch
>  	 * back to the correct page tables.
>  	 */
> -	ret = __cpu_suspend(arg, fn);
> +	ret = __cpu_suspend(arg, fn, __mpidr);
>  	if (ret == 0) {
>  		cpu_switch_mm(mm->pgd, mm);
>  		local_flush_bp_all();
> @@ -44,7 +45,8 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
>  #else
>  int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
>  {
> -	return __cpu_suspend(arg, fn);
> +	u32 __mpidr = cpu_logical_map(smp_processor_id());
> +	return __cpu_suspend(arg, fn, __mpidr);
>  }
>  #define	idmap_pgd	NULL
>  #endif
> -- 
> 1.8.1.2
> 

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH 01/13] ARM: suspend: use hash of cpu_logical_map value to index into save array
  2013-07-24 16:47   ` Dave Martin
@ 2013-07-24 18:55     ` Nicolas Pitre
  2013-07-25 10:55       ` Dave Martin
  0 siblings, 1 reply; 43+ messages in thread
From: Nicolas Pitre @ 2013-07-24 18:55 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, 24 Jul 2013, Dave Martin wrote:

> On Mon, Jul 22, 2013 at 11:31:17PM -0400, Nicolas Pitre wrote:
> > Currently we hash the MPIDR of the CPU being suspended to determine which
> > entry in the sleep_save_sp array to use. In some situations, such as when
> > we want to resume on another physical CPU, the MPIDR of another CPU should
> > be used instead.
> > 
> > So let's use the value of cpu_logical_map(smp_processor_id()) in place
> > of the MPIDR in the suspend path.  This will result in the same index
> > being used as with the previous code unless the caller has modified
> > cpu_logical_map() beforehand.
> 
> This only works because we happen to swap MPIDRs in cpu_logical_map
> before we get here, so that cpu_logical_map(smp_processor_id())
> described the post-resume situation for this logical CPU, not the
> current situation.

Yes, that's more or less what I said above.  Maybe that should be 
clarified further?

> We have to swizzle cpu_logical_map() somewhere, and the suspend path is
> just as good as the resume path for that.

Not quite.  On the resume path, we have to get to the context save array 
while having no mmu active and no stack.  There is no way we could take 
over another CPU's context in those conditions without making it 
extremely painful on ourselves.  It is therefore way easier to swizzle 
cpu_logical_map() on the suspend path and keep the suspend/resume logic 
unmodified.

> But this patch commits us to requiring that on the suspend path 
> specifically -- I think that ought to be mentioned somewhere. A 
> comment in the preamble for __cpu_suspend would be enough, I think.

What comment would you suggest?  I want to make sure the possible 
confusion you see is properly addressed.

> Looks like the patch should work though, and it does remove the need to
> read the MPIDR, which is slightly nicer for the non-switcher case.

This patch works fine here, and I'd expect rather catastrophic results 
if it didn't.  ;-)


Nicolas

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH 01/13] ARM: suspend: use hash of cpu_logical_map value to index into save array
  2013-07-24 18:55     ` Nicolas Pitre
@ 2013-07-25 10:55       ` Dave Martin
  2013-07-25 16:06         ` Nicolas Pitre
  0 siblings, 1 reply; 43+ messages in thread
From: Dave Martin @ 2013-07-25 10:55 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, Jul 24, 2013 at 02:55:00PM -0400, Nicolas Pitre wrote:
> On Wed, 24 Jul 2013, Dave Martin wrote:
> 
> > On Mon, Jul 22, 2013 at 11:31:17PM -0400, Nicolas Pitre wrote:
> > > Currently we hash the MPIDR of the CPU being suspended to determine which
> > > entry in the sleep_save_sp array to use. In some situations, such as when
> > > we want to resume on another physical CPU, the MPIDR of another CPU should
> > > be used instead.
> > > 
> > > So let's use the value of cpu_logical_map(smp_processor_id()) in place
> > > of the MPIDR in the suspend path.  This will result in the same index
> > > being used as with the previous code unless the caller has modified
> > > cpu_logical_map() beforehand.
> > 
> > This only works because we happen to swap MPIDRs in cpu_logical_map
> > before we get here, so that cpu_logical_map(smp_processor_id())
> > described the post-resume situation for this logical CPU, not the
> > current situation.
> 
> Yes, that's more or less what I said above.  Maybe that should be 
> clarified further?
> 
> > We have to swizzle cpu_logical_map() somewhere, and the suspend path is
> > just as good as the resume path for that.
> 
> Not quite.  On the resume path, we have to get to the context save array 
> while having no mmu active and no stack.  There is no way we could take 
> over another CPU's context in those conditions without making it 
> extremely painful on ourselves.  It is therefore way easier to swizzle 
> cpu_logical_map() on the suspend path and keep the suspend/resume logic 
> unmodified.

For this implementation, that's correct, and it's a perfectly good way
of doing things.

My point is that the user of cpu_resume() does not necessarily realise
the nature of this dependency.

> > But this patch commits us to requiring that on the suspend path 
> > specifically -- I think that ought to be mentioned somewhere. A 
> > comment in the preamble for __cpu_suspend would be enough, I think.
> 
> What comment would you suggest?  I want to make sure the possible 
> confusion you see is properly addressed.

I think we just need to state that the value of
cpu_logical_map(smp_processor_id()) must be the MPIDR of the physical
CPU the suspending logical CPU will resume on.  Consequently, if doing a
migration, cpu_logical_map() must be updated appropriately somewhere
between cpu_pm_enter() and cpu_suspend().


This also means that the choice of destination CPU can't be made
dynamically at resume time.  But I'm not sure that's useful or even
makes sense, so I'm not worrying about it.

> > Looks like the patch should work though, and it does remove the need to
> > read the MPIDR, which is slightly nicer for the non-switcher case.
> 
> This patch works fine here, and I'd expect rather catastrophic results 
> if it didn't.  ;-)

Sure, I'm not doubting that the code works.

Cheers
---Dave

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH 02/13] ARM: gic: add CPU migration support
  2013-07-23  3:31 ` [PATCH 02/13] ARM: gic: add CPU migration support Nicolas Pitre
@ 2013-07-25 11:44   ` Jonathan Austin
  2013-07-25 19:11     ` Nicolas Pitre
  0 siblings, 1 reply; 43+ messages in thread
From: Jonathan Austin @ 2013-07-25 11:44 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Nico,

I've got a couple of minor questions/comments about this one...

On 23/07/13 04:31, Nicolas Pitre wrote:
> This is required by the big.LITTLE switcher code.
>
> The gic_migrate_target() changes the CPU interface mapping for the
> current CPU to redirect SGIs to the specified interface, and it also
> updates the target CPU for each interrupts to that CPU interface
> if they were targeting the current interface.  Finally, pending
> SGIs for the current CPU are forwarded to the new interface.
>
> Because Linux does not use it, the SGI source information for the
> forwarded SGIs is not preserved.  Neither is the source information
> for the SGIs sent by the current CPU to other CPUs adjusted to match
> the new CPU interface mapping.  The required registers are banked so
> only the target CPU could do it.
>
> Signed-off-by: Nicolas Pitre <nico@linaro.org>
> ---
>   drivers/irqchip/irq-gic.c       | 81 +++++++++++++++++++++++++++++++++++++++--
>   include/linux/irqchip/arm-gic.h |  4 ++
>   2 files changed, 82 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
> index ee7c503120..6bd5a8c1aa 100644
> --- a/drivers/irqchip/irq-gic.c
> +++ b/drivers/irqchip/irq-gic.c
> @@ -253,10 +253,9 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
>   	if (cpu >= NR_GIC_CPU_IF || cpu >= nr_cpu_ids)
>   		return -EINVAL;
>
> +	raw_spin_lock(&irq_controller_lock);
>   	mask = 0xff << shift;
>   	bit = gic_cpu_map[cpu] << shift;
> -
> -	raw_spin_lock(&irq_controller_lock);
>   	val = readl_relaxed(reg) & ~mask;
>   	writel_relaxed(val | bit, reg);
>   	raw_spin_unlock(&irq_controller_lock);
> @@ -646,7 +645,9 @@ static void __init gic_pm_init(struct gic_chip_data *gic)
>   void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
>   {
>   	int cpu;
> -	unsigned long map = 0;
> +	unsigned long flags, map = 0;
> +
> +	raw_spin_lock_irqsave(&irq_controller_lock, flags);
>
>   	/* Convert our logical CPU mask into a physical one. */
>   	for_each_cpu(cpu, mask)
> @@ -660,6 +661,80 @@ void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
>
>   	/* this always happens on GIC0 */
>   	writel_relaxed(map << 16 | irq, gic_data_dist_base(&gic_data[0]) + GIC_DIST_SOFTINT);
> +
> +	raw_spin_unlock_irqrestore(&irq_controller_lock, flags);
> +}
> +#endif
> +
> +#ifdef CONFIG_BL_SWITCHER
> +/*
> + * gic_migrate_target - migrate IRQs to another PU interface

(nit) Should that be _C_PU?

> + *
> + * @new_cpu_id: the CPU target ID to migrate IRQs to
> + *
> + * Migrate all peripheral interrupts with a target matching the current CPU
> + * to the interface corresponding to @new_cpu_id.  The CPU interface mapping
> + * is also updated.  Targets to other CPU interfaces are unchanged.
> + * This must be called with IRQs locally disabled.
> + */
> +void gic_migrate_target(unsigned int new_cpu_id)
> +{
> +	unsigned int old_cpu_id, gic_irqs, gic_nr = 0;
> +	void __iomem *dist_base;
> +	int i, ror_val, cpu = smp_processor_id();
> +	u32 val, old_mask, active_mask;
> +
> +	if (gic_nr >= MAX_GIC_NR)
> +		BUG();
> +
> +	dist_base = gic_data_dist_base(&gic_data[gic_nr]);
> +	if (!dist_base)
> +		return;
> +	gic_irqs = gic_data[gic_nr].gic_irqs;
> +
> +	old_cpu_id = __ffs(gic_cpu_map[cpu]);
> +	old_mask = 0x01010101 << old_cpu_id;

I don't think this is very clear, though section 4.3.12 of the GIC spec 
helps a lot! A little pointer or some more self-evident name for the 
mask might be nice...

old_mask = GIC_ITARGETSR_MASK << old_cpu_id

or similar? This@least points one to the right bit of documentation?

> +	ror_val = (old_cpu_id - new_cpu_id) & 31;
> +
> +	raw_spin_lock(&irq_controller_lock);
> +
> +	gic_cpu_map[cpu] = 1 << new_cpu_id;
> +
> +	for (i = 8; i < DIV_ROUND_UP(gic_irqs, 4); i++) {

Does this '8' here warrant a #define? GIC_RO_INTR_REGS?

Perhaps everyone looking@the code will be familiar enough with the 
GIC to see immediately why the first 8 entries are being skipped...?

> +		val = readl_relaxed(dist_base + GIC_DIST_TARGET + i * 4);
> +		active_mask = val & old_mask;
> +		if (active_mask) {
> +			val &= ~active_mask;
> +			val |= ror32(active_mask, ror_val);
> +			writel_relaxed(val, dist_base + GIC_DIST_TARGET + i*4);
> +		}
> +	}
> +
> +	raw_spin_unlock(&irq_controller_lock);
> +
> +	/*
> +	 * Now let's migrate and clear any potential SGIs that might be
> +	 * pending for us (old_cpu_id).  Since GIC_DIST_SGI_PENDING_SET
> +	 * is a banked register, we can only forward the SGI using
> +	 * GIC_DIST_SOFTINT.  The original SGI source is lost but Linux
> +	 * doesn't use that information anyway.
> +	 *
> +	 * For the same reason we do not adjust SGI source information
> +	 * for previously sent SGIs by us to other CPUs either.
> +	 */
> +	for (i = 0; i < 16; i += 4) {
> +		int j;
> +		val = readl_relaxed(dist_base + GIC_DIST_SGI_PENDING_SET + i);
> +		if (!val)
> +			continue;
> +		writel_relaxed(val, dist_base + GIC_DIST_SGI_PENDING_CLEAR + i);
> +		for (j = i; j < i + 4; j++) {
> +			if (val & 0xff)
> +				writel_relaxed((1 << (new_cpu_id + 16)) | j,
> +						dist_base + GIC_DIST_SOFTINT);
> +			val >>= 8;
> +		}
> +	}

I'm curious as to why we don't need to do anything for PPIs here - could 
there be any pending PPIs that don't get handled (I guess retargetting 
doesn't make sense for these?)

>   }
>   #endif
>
> diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
> index 3e203eb23c..40bfcac959 100644
> --- a/include/linux/irqchip/arm-gic.h
> +++ b/include/linux/irqchip/arm-gic.h
> @@ -31,6 +31,8 @@
>   #define GIC_DIST_TARGET			0x800
>   #define GIC_DIST_CONFIG			0xc00
>   #define GIC_DIST_SOFTINT		0xf00
> +#define GIC_DIST_SGI_PENDING_CLEAR	0xf10
> +#define GIC_DIST_SGI_PENDING_SET	0xf20
>
>   #define GICH_HCR			0x0
>   #define GICH_VTR			0x4
> @@ -73,6 +75,8 @@ static inline void gic_init(unsigned int nr, int start,
>   	gic_init_bases(nr, start, dist, cpu, 0, NULL);
>   }
>
> +void gic_migrate_target(unsigned int new_cpu_id);
> +
>   #endif /* __ASSEMBLY */
>
>   #endif
>

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH 01/13] ARM: suspend: use hash of cpu_logical_map value to index into save array
  2013-07-25 10:55       ` Dave Martin
@ 2013-07-25 16:06         ` Nicolas Pitre
  2013-07-26 11:31           ` Lorenzo Pieralisi
  0 siblings, 1 reply; 43+ messages in thread
From: Nicolas Pitre @ 2013-07-25 16:06 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, 25 Jul 2013, Dave Martin wrote:

> On Wed, Jul 24, 2013 at 02:55:00PM -0400, Nicolas Pitre wrote:
> > On Wed, 24 Jul 2013, Dave Martin wrote:
> > 
> > > But this patch commits us to requiring that on the suspend path 
> > > specifically -- I think that ought to be mentioned somewhere. A 
> > > comment in the preamble for __cpu_suspend would be enough, I think.
> > 
> > What comment would you suggest?  I want to make sure the possible 
> > confusion you see is properly addressed.
> 
> I think we just need to state that the value of
> cpu_logical_map(smp_processor_id()) must be the MPIDR of the physical
> CPU the suspending logical CPU will resume on.  Consequently, if doing a
> migration, cpu_logical_map() must be updated appropriately somewhere
> between cpu_pm_enter() and cpu_suspend().

Excellent.  I've amended the patch with this:

diff --git a/arch/arm/kernel/suspend.c b/arch/arm/kernel/suspend.c
index 2835d35234..caf938db62 100644
--- a/arch/arm/kernel/suspend.c
+++ b/arch/arm/kernel/suspend.c
@@ -17,6 +17,11 @@ extern void cpu_resume_mmu(void);
 /*
  * Hide the first two arguments to __cpu_suspend - these are an implementation
  * detail which platform code shouldn't have to know about.
+ *
+ * On SMP systems, the value of cpu_logical_map(smp_processor_id()) must be
+ * the MPIDR of the physical CPU the suspending logical CPU will resume on.
+ * Consequently, if doing a physical CPU migration, cpu_logical_map() must be
+ * updated appropriately somewhere between cpu_pm_enter() and cpu_suspend().
  */
 int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
 {

I've put it against cpu_suspend() rather than __cpu_suspend(() as this 
is what users should care about.

ACK?


Nicolas

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 03/13] ARM: b.L: core switcher code
  2013-07-23  3:31 ` [PATCH 03/13] ARM: b.L: core switcher code Nicolas Pitre
@ 2013-07-25 17:15   ` Jonathan Austin
  2013-07-25 20:20     ` Nicolas Pitre
  2013-07-26 10:45   ` Lorenzo Pieralisi
  2013-07-26 14:53   ` Russell King - ARM Linux
  2 siblings, 1 reply; 43+ messages in thread
From: Jonathan Austin @ 2013-07-25 17:15 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Nico,

I've got a few more questions here...

On 23/07/13 04:31, Nicolas Pitre wrote:
> This is the core code implementing big.LITTLE switcher functionality.
> Rational for this code is available here:
>
> http://lwn.net/Articles/481055/
>
> The main entry point for a switch request is:
>
> void bL_switch_request(unsigned int cpu, unsigned int new_cluster_id)
>
> If the calling CPU is not the wanted one, this wrapper takes care of
> sending the request to the appropriate CPU with schedule_work_on().
>
> At the moment the core switch operation is handled by bL_switch_to()
> which must be called on the CPU for which a switch is requested.
>
> What this code does:
>
>    * Return early if the current cluster is the wanted one.
>
>    * Close the gate in the kernel entry vector for both the inbound
>      and outbound CPUs.
>
>    * Wake up the inbound CPU so it can perform its reset sequence in
>      parallel up to the kernel entry vector gate.
>
>    * Migrate all interrupts in the GIC targeting the outbound CPU
>      interface to the inbound CPU interface, including SGIs. This is
>      performed by gic_migrate_target() in drivers/irqchip/irq-gic.c.
>
>    * Call cpu_pm_enter() which takes care of flushing the VFP state to
>      RAM and save the CPU interface config from the GIC to RAM.
>
>    * Modify the cpu_logical_map to refer to the inbound physical CPU.
>
>    * Call cpu_suspend() which saves the CPU state (general purpose
>      registers, page table address) onto the stack and store the
>      resulting stack pointer in an array indexed by the updated
>      cpu_logical_map, then call the provided shutdown function.
>      This happens in arch/arm/kernel/sleep.S.
>
> At this point, the provided shutdown function executed by the outbound
> CPU ungates the inbound CPU. Therefore the inbound CPU:
>
>    * Picks up the saved stack pointer in the array indexed by its MPIDR
>      in arch/arm/kernel/sleep.S.
>
>    * The MMU and caches are re-enabled using the saved state on the
>      provided stack, just like if this was a resume operation from a
>      suspended state.
>
>    * Then cpu_suspend() returns, although this is on the inbound CPU
>      rather than the outbound CPU which called it initially.
>
>    * The function cpu_pm_exit() is called which effect is to restore the
>      CPU interface state in the GIC using the state previously saved by
>      the outbound CPU.
>
>    * Exit of bL_switch_to() to resume normal kernel execution on the
>      new CPU.
>
> However, the outbound CPU is potentially still running in parallel while
> the inbound CPU is resuming normal kernel execution, hence we need
> per CPU stack isolation to execute bL_do_switch().  After the outbound
> CPU has ungated the inbound CPU, it calls mcpm_cpu_power_down() to:
>
>    * Clean its L1 cache.
>
>    * If it is the last CPU still alive in its cluster (last man standing),
>      it also cleans its L2 cache and disables cache snooping from the other
>      cluster.
>
>    * Power down the CPU (or whole cluster).
>
> Code called from bL_do_switch() might end up referencing 'current' for
> some reasons.  However, 'current' is derived from the stack pointer.
> With any arbitrary stack, the returned value for 'current' and any
> dereferenced values through it are just random garbage which may lead to
> segmentation faults.
>
> The active page table during the execution of bL_do_switch() is also a
> problem.  There is no guarantee that the inbound CPU won't destroy the
> corresponding task which would free the attached page table while the
> outbound CPU is still running and relying on it.
>
> To solve both issues, we borrow some of the task space belonging to
> the init/idle task which, by its nature, is lightly used and therefore
> is unlikely to clash with our usage.  The init task is also never going
> away.

I had written a comment here about this looking hairy, and asking 
whether you'd considered using dedicated threads for this.

As I hit the later patches in the series I see that's exactly what 
you've done. Is it really worth keeping the historical solution in here? 
Could build/bisectability be maintained by moving the Kconfig options 
later in the series, but without requiring the weird init/idle hack to 
be kept around?

>
> Right now the logical CPU number is assumed to be equivalent to the
> physical CPU number within each cluster. The kernel should also be
> booted with only one cluster active.  These limitations will be lifted
> eventually.
>

I just read this aloud in the office and Dave Martin mentioned that the 
MPIDR hashing schemes make this obselete now?

> Signed-off-by: Nicolas Pitre <nico@linaro.org>
> ---
>   arch/arm/Kconfig                   |  17 +++
>   arch/arm/common/Makefile           |   1 +
>   arch/arm/common/bL_switcher.c      | 247 +++++++++++++++++++++++++++++++++++++
>   arch/arm/include/asm/bL_switcher.h |  17 +++
>   4 files changed, 282 insertions(+)
>   create mode 100644 arch/arm/common/bL_switcher.c
>   create mode 100644 arch/arm/include/asm/bL_switcher.h
>
> diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
> index ba412e02ec..2c9e5bf734 100644
> --- a/arch/arm/Kconfig
> +++ b/arch/arm/Kconfig
> @@ -1538,6 +1538,23 @@ config MCPM
>            for (multi-)cluster based systems, such as big.LITTLE based
>            systems.
>
> +config BIG_LITTLE
> +       bool "big.LITTLE support (Experimental)"
> +       depends on CPU_V7 && SMP
> +       select MCPM
> +       help
> +         This option enables support for the big.LITTLE architecture.

I'm not sure we should describe this as an architecture?

"This option enables support for big.LITTLE processing"?

What are the plans for/implications of having CONFIG_BIG_LITTLE and 
!CONFIG_BL_SWITCHER?

> +
> +config BL_SWITCHER
> +       bool "big.LITTLE switcher support"
> +       depends on BIG_LITTLE && MCPM && HOTPLUG_CPU
> +       select CPU_PM
> +       select ARM_CPU_SUSPEND
> +       help
> +         The big.LITTLE "switcher" provides the core functionality to
> +         transparently handle transition between a cluster of A15's
> +         and a cluster of A7's in a big.LITTLE system.
> +
>   choice
>          prompt "Memory split"
>          default VMSPLIT_3G
> diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile
> index 8c60f473e9..2586240d5a 100644
> --- a/arch/arm/common/Makefile
> +++ b/arch/arm/common/Makefile
> @@ -17,3 +17,4 @@ obj-$(CONFIG_MCPM)            += mcpm_head.o mcpm_entry.o mcpm_platsmp.o vlock.o
>   AFLAGS_mcpm_head.o             := -march=armv7-a
>   AFLAGS_vlock.o                 := -march=armv7-a
>   obj-$(CONFIG_TI_PRIV_EDMA)     += edma.o
> +obj-$(CONFIG_BL_SWITCHER)      += bL_switcher.o
> diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
> new file mode 100644
> index 0000000000..e63881b430
> --- /dev/null
> +++ b/arch/arm/common/bL_switcher.c
> @@ -0,0 +1,247 @@
> +/*
> + * arch/arm/common/bL_switcher.c -- big.LITTLE cluster switcher core driver
> + *
> + * Created by: Nicolas Pitre, March 2012
> + * Copyright:  (C) 2012-2013  Linaro Limited
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include <linux/init.h>
> +#include <linux/kernel.h>
> +#include <linux/module.h>
> +#include <linux/sched.h>
> +#include <linux/interrupt.h>
> +#include <linux/cpu_pm.h>
> +#include <linux/workqueue.h>
> +#include <linux/mm.h>
> +#include <linux/string.h>
> +#include <linux/irqchip/arm-gic.h>
> +
> +#include <asm/smp_plat.h>
> +#include <asm/cacheflush.h>
> +#include <asm/suspend.h>
> +#include <asm/mcpm.h>
> +#include <asm/bL_switcher.h>
> +
> +
> +/*
> + * Use our own MPIDR accessors as the generic ones in asm/cputype.h have
> + * __attribute_const__ and we don't want the compiler to assume any
> + * constness here as the value _does_ change along some code paths.
> + */
> +
> +static int read_mpidr(void)
> +{
> +       unsigned int id;
> +       asm volatile ("mrc\tp15, 0, %0, c0, c0, 5" : "=r" (id));

Is the \t in here an artefact of our mail servers? I notice that in 
other places, for example arch/arm/include/asm/cp15.h, we don't have 
that formatting.

> +       return id & MPIDR_HWID_BITMASK;
> +}
> +
> +/*
> + * bL switcher core code.
> + */
> +
> +static void bL_do_switch(void *_unused)
> +{
> +       unsigned mpidr, cpuid, clusterid, ob_cluster, ib_cluster;
> +
> +       /*
> +        * We now have a piece of stack borrowed from the init task's.
> +        * Let's also switch to init_mm right away to match it.
> +        */
> +       cpu_switch_mm(init_mm.pgd, &init_mm);
> +
> +       pr_debug("%s\n", __func__);

This looks like fairly uninformative debug - is it left here 
intentionally? If so, isn't there a better message to print than just 
the function name?
> +
> +       mpidr = read_mpidr();
> +       cpuid = MPIDR_AFFINITY_LEVEL(mpidr, 0);
> +       clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1);
> +       ob_cluster = clusterid;
> +       ib_cluster = clusterid ^ 1;
> +
> +       /*
> +        * Our state has been saved at this point.  Let's release our
> +        * inbound CPU.
> +        */
> +       mcpm_set_entry_vector(cpuid, ib_cluster, cpu_resume);
> +       sev();
> +
> +       /*
> +        * From this point, we must assume that our counterpart CPU might
> +        * have taken over in its parallel world already, as if execution
> +        * just returned from cpu_suspend().  It is therefore important to
> +        * be very careful not to make any change the other guy is not
> +        * expecting.  This is why we need stack isolation.
> +        *
> +        * Fancy under cover tasks could be performed here.  For now
> +        * we have none.
> +        */
> +
> +       /* Let's put ourself down. */
> +       mcpm_cpu_power_down();
> +
> +       /* should never get here */
> +       BUG();
> +}
> +
> +/*
> + * Stack isolation.  To ensure 'current' remains valid, we just borrow
> + * a slice of the init/idle task which should be fairly lightly used.
> + * The borrowed area starts just above the thread_info structure located
> + * at the very bottom of the stack, aligned to a cache line.
> + */
> +#define STACK_SIZE 256
> +extern void call_with_stack(void (*fn)(void *), void *arg, void *sp);
> +static int bL_switchpoint(unsigned long _arg)
> +{
> +       unsigned int mpidr = read_mpidr();
> +       unsigned int cpuid = MPIDR_AFFINITY_LEVEL(mpidr, 0);
> +       unsigned int clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1);
> +       unsigned int cpu_index = cpuid + clusterid * MAX_CPUS_PER_CLUSTER;
> +       void *stack = &init_thread_info + 1;
> +       stack = PTR_ALIGN(stack, L1_CACHE_BYTES);

Are there any implications if there were big.LITTLE cores with different 
cache line sizes? The LWN article that you reference mentions different 
cache topologies...

> +       stack += cpu_index * STACK_SIZE + STACK_SIZE;
> +       call_with_stack(bL_do_switch, (void *)_arg, stack);
> +       BUG();
> +}
> +
> +/*
> + * Generic switcher interface
> + */
> +
> +/*
> + * bL_switch_to - Switch to a specific cluster for the current CPU
> + * @new_cluster_id: the ID of the cluster to switch to.
> + *
> + * This function must be called on the CPU to be switched.
> + * Returns 0 on success, else a negative status code.
> + */
> +static int bL_switch_to(unsigned int new_cluster_id)
> +{
> +       unsigned int mpidr, cpuid, clusterid, ob_cluster, ib_cluster, this_cpu;
> +       int ret;
> +
> +       mpidr = read_mpidr();
> +       cpuid = MPIDR_AFFINITY_LEVEL(mpidr, 0);
> +       clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1);
> +       ob_cluster = clusterid;
> +       ib_cluster = clusterid ^ 1;
> +
> +       if (new_cluster_id == clusterid)
> +               return 0;
> +
> +       pr_debug("before switch: CPU %d in cluster %d\n", cpuid, clusterid);
> +
> +       /* Close the gate for our entry vectors */
> +       mcpm_set_entry_vector(cpuid, ob_cluster, NULL);
> +       mcpm_set_entry_vector(cpuid, ib_cluster, NULL);
> +
> +       /*
> +        * Let's wake up the inbound CPU now in case it requires some delay
> +        * to come online, but leave it gated in our entry vector code.
> +        */
> +       ret = mcpm_cpu_power_up(cpuid, ib_cluster);
> +       if (ret) {
> +               pr_err("%s: mcpm_cpu_power_up() returned %d\n", __func__, ret);
> +               return ret;
> +       }
> +
> +       /*
> +        * From this point we are entering the switch critical zone
> +        * and can't sleep/schedule anymore.
> +        */

Will this function ever be called from anywhere other than 
bL_switch_request? As far as I could see, bl_switch_request uses two 
different methods of ensuring that we will still be on the same CPU 
throughout this function (if called directly the get_cpu and put_cpu 
dis/enbale pre-emption and via the work queue we're guaranteed the same 
logical cpu). The different approaches somewhat obfuscate the need for 
something to have been done even before this point to stop us getting 
migrated to another CPU, I think...

> +       local_irq_disable();
> +       local_fiq_disable();
> +
> +       this_cpu = smp_processor_id();
> +
> +       /* redirect GIC's SGIs to our counterpart */
> +       gic_migrate_target(cpuid + ib_cluster*4);
> +
> +       /*
> +        * Raise a SGI on the inbound CPU to make sure it doesn't stall
> +        * in a possible WFI, such as in mcpm_power_down().
> +        */
> +       arch_send_wakeup_ipi_mask(cpumask_of(this_cpu));
> +
> +       ret = cpu_pm_enter();
> +
> +       /* we can not tolerate errors at this point */
> +       if (ret)
> +               panic("%s: cpu_pm_enter() returned %d\n", __func__, ret);
> +
> +       /*
> +        * Flip the cluster in the CPU logical map for this CPU.
> +        * This must be flushed to RAM as the resume code
> +        * needs to access it while the caches are still disabled.
> +        */
> +       cpu_logical_map(this_cpu) ^= (1 << 8);
> +       sync_cache_w(&cpu_logical_map(this_cpu));
> +
> +       /* Let's do the actual CPU switch. */
> +       ret = cpu_suspend(0, bL_switchpoint);
> +       if (ret > 0)
> +               panic("%s: cpu_suspend() returned %d\n", __func__, ret);
> +
> +       /* We are executing on the inbound CPU at this point */

Very cool :)

> +       mpidr = read_mpidr();
> +       cpuid = MPIDR_AFFINITY_LEVEL(mpidr, 0);
> +       clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1);
> +       pr_debug("after switch: CPU %d in cluster %d\n", cpuid, clusterid);

Do we want a 'switcher' prefix on these? "bL_switcher: after switch..." etc?

> +       BUG_ON(clusterid != ib_cluster);
> +
> +       mcpm_cpu_powered_up();
> +
> +       ret = cpu_pm_exit();
> +
> +       local_fiq_enable();
> +       local_irq_enable();
> +
> +       if (ret)
> +               pr_err("%s exiting with error %d\n", __func__, ret);
> +       return ret;
> +}
> +
> +struct switch_args {
> +       unsigned int cluster;
> +       struct work_struct work;
> +};
> +
> +static void __bL_switch_to(struct work_struct *work)
> +{
> +       struct switch_args *args = container_of(work, struct switch_args, work);
> +       bL_switch_to(args->cluster);
> +}
> +
> +/*
> + * bL_switch_request - Switch to a specific cluster for the given CPU
> + *
> + * @cpu: the CPU to switch
> + * @new_cluster_id: the ID of the cluster to switch to.
> + *
> + * This function causes a cluster switch on the given CPU.  If the given
> + * CPU is the same as the calling CPU then the switch happens right away.
> + * Otherwise the request is put on a work queue to be scheduled on the
> + * remote CPU.
> + */
> +void bL_switch_request(unsigned int cpu, unsigned int new_cluster_id)
> +{
> +       unsigned int this_cpu = get_cpu();
> +       struct switch_args args;
> +
> +       if (cpu == this_cpu) {
> +               bL_switch_to(new_cluster_id);
> +               put_cpu();
> +               return;
> +       }
> +       put_cpu();
> +
> +       args.cluster = new_cluster_id;
> +       INIT_WORK_ONSTACK(&args.work, __bL_switch_to);
> +       schedule_work_on(cpu, &args.work);

The comment at the top of schedule_work_on() says:
  [...]
  * We queue the work to a specific CPU, the caller must ensure it
  * can't go away.
  [...]

I see we don't disable cpu hotplug requests until later in the series - 
does that matter? Until that patch, it seems that we could hotplug off 
the core that we'd scheduled the work onto?


> +       flush_work(&args.work);
> +}
> +EXPORT_SYMBOL_GPL(bL_switch_request);
> diff --git a/arch/arm/include/asm/bL_switcher.h b/arch/arm/include/asm/bL_switcher.h
> new file mode 100644
> index 0000000000..72efe3f349
> --- /dev/null
> +++ b/arch/arm/include/asm/bL_switcher.h
> @@ -0,0 +1,17 @@
> +/*
> + * arch/arm/include/asm/bL_switcher.h
> + *
> + * Created by:  Nicolas Pitre, April 2012
> + * Copyright:   (C) 2012-2013  Linaro Limited
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#ifndef ASM_BL_SWITCHER_H
> +#define ASM_BL_SWITCHER_H
> +
> +void bL_switch_request(unsigned int cpu, unsigned int new_cluster_id);
> +
> +#endif
> --
> 1.8.1.2
>
Hope some of those questions are helpful,

Jonny

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH 02/13] ARM: gic: add CPU migration support
  2013-07-25 11:44   ` Jonathan Austin
@ 2013-07-25 19:11     ` Nicolas Pitre
  0 siblings, 0 replies; 43+ messages in thread
From: Nicolas Pitre @ 2013-07-25 19:11 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, 25 Jul 2013, Jonathan Austin wrote:

> Hi Nico,
> 
> I've got a couple of minor questions/comments about this one...
> 
> On 23/07/13 04:31, Nicolas Pitre wrote:
> > This is required by the big.LITTLE switcher code.
> > 
> > The gic_migrate_target() changes the CPU interface mapping for the
> > current CPU to redirect SGIs to the specified interface, and it also
> > updates the target CPU for each interrupts to that CPU interface
> > if they were targeting the current interface.  Finally, pending
> > SGIs for the current CPU are forwarded to the new interface.
> > 
> > Because Linux does not use it, the SGI source information for the
> > forwarded SGIs is not preserved.  Neither is the source information
> > for the SGIs sent by the current CPU to other CPUs adjusted to match
> > the new CPU interface mapping.  The required registers are banked so
> > only the target CPU could do it.
> > 
> > Signed-off-by: Nicolas Pitre <nico@linaro.org>
> > ---
> >   drivers/irqchip/irq-gic.c       | 81
> > +++++++++++++++++++++++++++++++++++++++--
> >   include/linux/irqchip/arm-gic.h |  4 ++
> >   2 files changed, 82 insertions(+), 3 deletions(-)
> > 
> > diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
> > index ee7c503120..6bd5a8c1aa 100644
> > --- a/drivers/irqchip/irq-gic.c
> > +++ b/drivers/irqchip/irq-gic.c
> > @@ -253,10 +253,9 @@ static int gic_set_affinity(struct irq_data *d, const
> > struct cpumask *mask_val,
> >   	if (cpu >= NR_GIC_CPU_IF || cpu >= nr_cpu_ids)
> >   		return -EINVAL;
> > 
> > +	raw_spin_lock(&irq_controller_lock);
> >   	mask = 0xff << shift;
> >   	bit = gic_cpu_map[cpu] << shift;
> > -
> > -	raw_spin_lock(&irq_controller_lock);
> >   	val = readl_relaxed(reg) & ~mask;
> >   	writel_relaxed(val | bit, reg);
> >   	raw_spin_unlock(&irq_controller_lock);
> > @@ -646,7 +645,9 @@ static void __init gic_pm_init(struct gic_chip_data
> > *gic)
> >   void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
> >   {
> >   	int cpu;
> > -	unsigned long map = 0;
> > +	unsigned long flags, map = 0;
> > +
> > +	raw_spin_lock_irqsave(&irq_controller_lock, flags);
> > 
> >   	/* Convert our logical CPU mask into a physical one. */
> >   	for_each_cpu(cpu, mask)
> > @@ -660,6 +661,80 @@ void gic_raise_softirq(const struct cpumask *mask,
> > unsigned int irq)
> > 
> >   	/* this always happens on GIC0 */
> >   	writel_relaxed(map << 16 | irq, gic_data_dist_base(&gic_data[0]) +
> > GIC_DIST_SOFTINT);
> > +
> > +	raw_spin_unlock_irqrestore(&irq_controller_lock, flags);
> > +}
> > +#endif
> > +
> > +#ifdef CONFIG_BL_SWITCHER
> > +/*
> > + * gic_migrate_target - migrate IRQs to another PU interface
> 
> (nit) Should that be _C_PU?

Obviously.

> > + *
> > + * @new_cpu_id: the CPU target ID to migrate IRQs to
> > + *
> > + * Migrate all peripheral interrupts with a target matching the current CPU
> > + * to the interface corresponding to @new_cpu_id.  The CPU interface
> > mapping
> > + * is also updated.  Targets to other CPU interfaces are unchanged.
> > + * This must be called with IRQs locally disabled.
> > + */
> > +void gic_migrate_target(unsigned int new_cpu_id)
> > +{
> > +	unsigned int old_cpu_id, gic_irqs, gic_nr = 0;
> > +	void __iomem *dist_base;
> > +	int i, ror_val, cpu = smp_processor_id();
> > +	u32 val, old_mask, active_mask;
> > +
> > +	if (gic_nr >= MAX_GIC_NR)
> > +		BUG();
> > +
> > +	dist_base = gic_data_dist_base(&gic_data[gic_nr]);
> > +	if (!dist_base)
> > +		return;
> > +	gic_irqs = gic_data[gic_nr].gic_irqs;
> > +
> > +	old_cpu_id = __ffs(gic_cpu_map[cpu]);
> > +	old_mask = 0x01010101 << old_cpu_id;
> 
> I don't think this is very clear, though section 4.3.12 of the GIC spec helps
> a lot! A little pointer or some more self-evident name for the mask might be
> nice...
> 
> old_mask = GIC_ITARGETSR_MASK << old_cpu_id
> 
> or similar? This at least points one to the right bit of documentation?

I've renamed old_cpu_id to cur_cpu_id and old_mask to cur_target_mask.
Also added a comment later on.

> > +	ror_val = (old_cpu_id - new_cpu_id) & 31;
> > +
> > +	raw_spin_lock(&irq_controller_lock);
> > +
> > +	gic_cpu_map[cpu] = 1 << new_cpu_id;
> > +
> > +	for (i = 8; i < DIV_ROUND_UP(gic_irqs, 4); i++) {
> 
> Does this '8' here warrant a #define? GIC_RO_INTR_REGS?
> 
> Perhaps everyone looking at the code will be familiar enough with the GIC to
> see immediately why the first 8 entries are being skipped...?

I've added a comment right before this loop explaining what is being 
done, mentioning why it starts at 8.

> I'm curious as to why we don't need to do anything for PPIs here - could there
> be any pending PPIs that don't get handled (I guess retargetting doesn't make
> sense for these?)

PPIs are strictly processor private and I don't think there is any way 
for one processor to set them on another processor.  So the only way to 
cope with those is for the outbound CPU to save and disable its PPI 
state, and let the inbound CPU enable them.  That should be up to each 
PPI-using driver to do via the cpu_pm_enter()/cpu_pm_exit() notifiers.

I've amended the patch with the following:

diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 6bd5a8c1aa..268874ac75 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -668,7 +668,7 @@ void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
 
 #ifdef CONFIG_BL_SWITCHER
 /*
- * gic_migrate_target - migrate IRQs to another PU interface
+ * gic_migrate_target - migrate IRQs to another CPU interface
  *
  * @new_cpu_id: the CPU target ID to migrate IRQs to
  *
@@ -679,10 +679,10 @@ void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
  */
 void gic_migrate_target(unsigned int new_cpu_id)
 {
-	unsigned int old_cpu_id, gic_irqs, gic_nr = 0;
+	unsigned int cur_cpu_id, gic_irqs, gic_nr = 0;
 	void __iomem *dist_base;
 	int i, ror_val, cpu = smp_processor_id();
-	u32 val, old_mask, active_mask;
+	u32 val, cur_target_mask, active_mask;
 
 	if (gic_nr >= MAX_GIC_NR)
 		BUG();
@@ -692,17 +692,23 @@ void gic_migrate_target(unsigned int new_cpu_id)
 		return;
 	gic_irqs = gic_data[gic_nr].gic_irqs;
 
-	old_cpu_id = __ffs(gic_cpu_map[cpu]);
-	old_mask = 0x01010101 << old_cpu_id;
-	ror_val = (old_cpu_id - new_cpu_id) & 31;
+	cur_cpu_id = __ffs(gic_cpu_map[cpu]);
+	cur_target_mask = 0x01010101 << cur_cpu_id;
+	ror_val = (cur_cpu_id - new_cpu_id) & 31;
 
 	raw_spin_lock(&irq_controller_lock);
 
+	/* Update the target interface for this logical CPU */
 	gic_cpu_map[cpu] = 1 << new_cpu_id;
 
+	/*
+	 * Find all the peripheral interrupts targetting the current
+	 * CPU interface and migrate them to the new CPU interface.
+	 * We skip DIST_TARGET 0 to 7 as they are read-only.
+	 */
 	for (i = 8; i < DIV_ROUND_UP(gic_irqs, 4); i++) {
 		val = readl_relaxed(dist_base + GIC_DIST_TARGET + i * 4);
-		active_mask = val & old_mask;
+		active_mask = val & cur_target_mask;
 		if (active_mask) {
 			val &= ~active_mask;
 			val |= ror32(active_mask, ror_val);
@@ -714,7 +720,7 @@ void gic_migrate_target(unsigned int new_cpu_id)
 
 	/*
 	 * Now let's migrate and clear any potential SGIs that might be
-	 * pending for us (old_cpu_id).  Since GIC_DIST_SGI_PENDING_SET
+	 * pending for us (cur_cpu_id).  Since GIC_DIST_SGI_PENDING_SET
 	 * is a banked register, we can only forward the SGI using
 	 * GIC_DIST_SOFTINT.  The original SGI source is lost but Linux
 	 * doesn't use that information anyway.

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 03/13] ARM: b.L: core switcher code
  2013-07-25 17:15   ` Jonathan Austin
@ 2013-07-25 20:20     ` Nicolas Pitre
  0 siblings, 0 replies; 43+ messages in thread
From: Nicolas Pitre @ 2013-07-25 20:20 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, 25 Jul 2013, Jonathan Austin wrote:

> Hi Nico,
> 
> I've got a few more questions here...
> 
> On 23/07/13 04:31, Nicolas Pitre wrote:
> > This is the core code implementing big.LITTLE switcher functionality.
> > Rational for this code is available here:
> > 
> > http://lwn.net/Articles/481055/
> > 
> > The main entry point for a switch request is:
> > 
> > void bL_switch_request(unsigned int cpu, unsigned int new_cluster_id)
> > 
> > If the calling CPU is not the wanted one, this wrapper takes care of
> > sending the request to the appropriate CPU with schedule_work_on().
> > 
> > At the moment the core switch operation is handled by bL_switch_to()
> > which must be called on the CPU for which a switch is requested.
> > 
> > What this code does:
> > 
> >    * Return early if the current cluster is the wanted one.
> > 
> >    * Close the gate in the kernel entry vector for both the inbound
> >      and outbound CPUs.
> > 
> >    * Wake up the inbound CPU so it can perform its reset sequence in
> >      parallel up to the kernel entry vector gate.
> > 
> >    * Migrate all interrupts in the GIC targeting the outbound CPU
> >      interface to the inbound CPU interface, including SGIs. This is
> >      performed by gic_migrate_target() in drivers/irqchip/irq-gic.c.
> > 
> >    * Call cpu_pm_enter() which takes care of flushing the VFP state to
> >      RAM and save the CPU interface config from the GIC to RAM.
> > 
> >    * Modify the cpu_logical_map to refer to the inbound physical CPU.
> > 
> >    * Call cpu_suspend() which saves the CPU state (general purpose
> >      registers, page table address) onto the stack and store the
> >      resulting stack pointer in an array indexed by the updated
> >      cpu_logical_map, then call the provided shutdown function.
> >      This happens in arch/arm/kernel/sleep.S.
> > 
> > At this point, the provided shutdown function executed by the outbound
> > CPU ungates the inbound CPU. Therefore the inbound CPU:
> > 
> >    * Picks up the saved stack pointer in the array indexed by its MPIDR
> >      in arch/arm/kernel/sleep.S.
> > 
> >    * The MMU and caches are re-enabled using the saved state on the
> >      provided stack, just like if this was a resume operation from a
> >      suspended state.
> > 
> >    * Then cpu_suspend() returns, although this is on the inbound CPU
> >      rather than the outbound CPU which called it initially.
> > 
> >    * The function cpu_pm_exit() is called which effect is to restore the
> >      CPU interface state in the GIC using the state previously saved by
> >      the outbound CPU.
> > 
> >    * Exit of bL_switch_to() to resume normal kernel execution on the
> >      new CPU.
> > 
> > However, the outbound CPU is potentially still running in parallel while
> > the inbound CPU is resuming normal kernel execution, hence we need
> > per CPU stack isolation to execute bL_do_switch().  After the outbound
> > CPU has ungated the inbound CPU, it calls mcpm_cpu_power_down() to:
> > 
> >    * Clean its L1 cache.
> > 
> >    * If it is the last CPU still alive in its cluster (last man standing),
> >      it also cleans its L2 cache and disables cache snooping from the other
> >      cluster.
> > 
> >    * Power down the CPU (or whole cluster).
> > 
> > Code called from bL_do_switch() might end up referencing 'current' for
> > some reasons.  However, 'current' is derived from the stack pointer.
> > With any arbitrary stack, the returned value for 'current' and any
> > dereferenced values through it are just random garbage which may lead to
> > segmentation faults.
> > 
> > The active page table during the execution of bL_do_switch() is also a
> > problem.  There is no guarantee that the inbound CPU won't destroy the
> > corresponding task which would free the attached page table while the
> > outbound CPU is still running and relying on it.
> > 
> > To solve both issues, we borrow some of the task space belonging to
> > the init/idle task which, by its nature, is lightly used and therefore
> > is unlikely to clash with our usage.  The init task is also never going
> > away.
> 
> I had written a comment here about this looking hairy, and asking whether
> you'd considered using dedicated threads for this.
> 
> As I hit the later patches in the series I see that's exactly what you've
> done. Is it really worth keeping the historical solution in here?

I think so.  First, because it is much less code if this is introduced 
without kernel threads while still being fully functional.  Next, even 
with dedicated threads, we do have to play some games with the stack 
nevertheless, although to a lesser extent.  And I'm sure some people 
would have argued that this would have been better without kernel 
threads if they didn't see how it was without them.

> Could build/bisectability be maintained by moving the Kconfig options 
> later in the series, but without requiring the weird init/idle hack to 
> be kept around?

I don't follow you here.

> > Right now the logical CPU number is assumed to be equivalent to the
> > physical CPU number within each cluster. The kernel should also be
> > booted with only one cluster active.  These limitations will be lifted
> > eventually.
> > 
> 
> I just read this aloud in the office and Dave Martin mentioned that the MPIDR
> hashing schemes make this obselete now?

Only in the suspend /resume code path, not in the switcher proper yet. 
That's yet another level of complexity that I prefer to see introduced 
later.

> > Signed-off-by: Nicolas Pitre <nico@linaro.org>
> > ---
> >   arch/arm/Kconfig                   |  17 +++
> >   arch/arm/common/Makefile           |   1 +
> >   arch/arm/common/bL_switcher.c      | 247
> > +++++++++++++++++++++++++++++++++++++
> >   arch/arm/include/asm/bL_switcher.h |  17 +++
> >   4 files changed, 282 insertions(+)
> >   create mode 100644 arch/arm/common/bL_switcher.c
> >   create mode 100644 arch/arm/include/asm/bL_switcher.h
> > 
> > diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
> > index ba412e02ec..2c9e5bf734 100644
> > --- a/arch/arm/Kconfig
> > +++ b/arch/arm/Kconfig
> > @@ -1538,6 +1538,23 @@ config MCPM
> >            for (multi-)cluster based systems, such as big.LITTLE based
> >            systems.
> > 
> > +config BIG_LITTLE
> > +       bool "big.LITTLE support (Experimental)"
> > +       depends on CPU_V7 && SMP
> > +       select MCPM
> > +       help
> > +         This option enables support for the big.LITTLE architecture.
> 
> I'm not sure we should describe this as an architecture?
> 
> "This option enables support for big.LITTLE processing"?

Well, maybe "system architecture".  I've certainly seen that somewhere 
in the literature from ARM.

What about:

	  This option enables support selections for the big.LITTLE
	  system architecture.

> What are the plans for/implications of having CONFIG_BIG_LITTLE and
> !CONFIG_BL_SWITCHER?

There is a whole set of config options that should depend on 
CONFIG_BIG_LITTLE, such as the b.L cpufreq driver, or the b.L cpuidle 
driver just posted by Lorenzo (which is now depending on a TC2 config 
but that ought not to stay that way).  So you may decide to have 
big.LITTLE goodies like that but not the switcher.  The switcher is 
always optional, and it might become even more so when proper support 
for HMP scheduling is integrated into the scheduler. Hence the slightly 
different wording for the CONFIG_BIG_LITTLE option I,m suggesting above.

> > +config BL_SWITCHER
> > +       bool "big.LITTLE switcher support"
> > +       depends on BIG_LITTLE && MCPM && HOTPLUG_CPU
> > +       select CPU_PM
> > +       select ARM_CPU_SUSPEND
> > +       help
> > +         The big.LITTLE "switcher" provides the core functionality to
> > +         transparently handle transition between a cluster of A15's
> > +         and a cluster of A7's in a big.LITTLE system.
> > +
> >   choice
> >          prompt "Memory split"
> >          default VMSPLIT_3G
> > diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile
> > index 8c60f473e9..2586240d5a 100644
> > --- a/arch/arm/common/Makefile
> > +++ b/arch/arm/common/Makefile
> > @@ -17,3 +17,4 @@ obj-$(CONFIG_MCPM)            += mcpm_head.o mcpm_entry.o
> > mcpm_platsmp.o vlock.o
> >   AFLAGS_mcpm_head.o             := -march=armv7-a
> >   AFLAGS_vlock.o                 := -march=armv7-a
> >   obj-$(CONFIG_TI_PRIV_EDMA)     += edma.o
> > +obj-$(CONFIG_BL_SWITCHER)      += bL_switcher.o
> > diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
> > new file mode 100644
> > index 0000000000..e63881b430
> > --- /dev/null
> > +++ b/arch/arm/common/bL_switcher.c
> > @@ -0,0 +1,247 @@
> > +/*
> > + * arch/arm/common/bL_switcher.c -- big.LITTLE cluster switcher core driver
> > + *
> > + * Created by: Nicolas Pitre, March 2012
> > + * Copyright:  (C) 2012-2013  Linaro Limited
> > + *
> > + * This program is free software; you can redistribute it and/or modify
> > + * it under the terms of the GNU General Public License version 2 as
> > + * published by the Free Software Foundation.
> > + */
> > +
> > +#include <linux/init.h>
> > +#include <linux/kernel.h>
> > +#include <linux/module.h>
> > +#include <linux/sched.h>
> > +#include <linux/interrupt.h>
> > +#include <linux/cpu_pm.h>
> > +#include <linux/workqueue.h>
> > +#include <linux/mm.h>
> > +#include <linux/string.h>
> > +#include <linux/irqchip/arm-gic.h>
> > +
> > +#include <asm/smp_plat.h>
> > +#include <asm/cacheflush.h>
> > +#include <asm/suspend.h>
> > +#include <asm/mcpm.h>
> > +#include <asm/bL_switcher.h>
> > +
> > +
> > +/*
> > + * Use our own MPIDR accessors as the generic ones in asm/cputype.h have
> > + * __attribute_const__ and we don't want the compiler to assume any
> > + * constness here as the value _does_ change along some code paths.
> > + */
> > +
> > +static int read_mpidr(void)
> > +{
> > +       unsigned int id;
> > +       asm volatile ("mrc\tp15, 0, %0, c0, c0, 5" : "=r" (id));
> 
> Is the \t in here an artefact of our mail servers? I notice that in other
> places, for example arch/arm/include/asm/cp15.h, we don't have that
> formatting.

Well, if you have the inline asm statement on a single line, the 
traditional tab between the mnemonic and its argument might not fit well 
with its actual alignment on the screen.  You can find this "style" in 
arch/arm/include/barrier.h or arch/arm/include/bitops.h.

But I agree it is a little ugly.  I've replaced it with a simple space.

> > +       return id & MPIDR_HWID_BITMASK;
> > +}
> > +
> > +/*
> > + * bL switcher core code.
> > + */
> > +
> > +static void bL_do_switch(void *_unused)
> > +{
> > +       unsigned mpidr, cpuid, clusterid, ob_cluster, ib_cluster;
> > +
> > +       /*
> > +        * We now have a piece of stack borrowed from the init task's.
> > +        * Let's also switch to init_mm right away to match it.
> > +        */
> > +       cpu_switch_mm(init_mm.pgd, &init_mm);
> > +
> > +       pr_debug("%s\n", __func__);
> 
> This looks like fairly uninformative debug - is it left here intentionally? If
> so, isn't there a better message to print than just the function name?

My friend, when this function was only called for the first time, I was 
a happy camper.  So simply knowing that this is being called at all is 
already highly valuable debugging information.

> > +       mpidr = read_mpidr();
> > +       cpuid = MPIDR_AFFINITY_LEVEL(mpidr, 0);
> > +       clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1);
> > +       ob_cluster = clusterid;
> > +       ib_cluster = clusterid ^ 1;
> > +
> > +       /*
> > +        * Our state has been saved at this point.  Let's release our
> > +        * inbound CPU.
> > +        */
> > +       mcpm_set_entry_vector(cpuid, ib_cluster, cpu_resume);
> > +       sev();
> > +
> > +       /*
> > +        * From this point, we must assume that our counterpart CPU might
> > +        * have taken over in its parallel world already, as if execution
> > +        * just returned from cpu_suspend().  It is therefore important to
> > +        * be very careful not to make any change the other guy is not
> > +        * expecting.  This is why we need stack isolation.
> > +        *
> > +        * Fancy under cover tasks could be performed here.  For now
> > +        * we have none.
> > +        */
> > +
> > +       /* Let's put ourself down. */
> > +       mcpm_cpu_power_down();
> > +
> > +       /* should never get here */
> > +       BUG();
> > +}
> > +
> > +/*
> > + * Stack isolation.  To ensure 'current' remains valid, we just borrow
> > + * a slice of the init/idle task which should be fairly lightly used.
> > + * The borrowed area starts just above the thread_info structure located
> > + * at the very bottom of the stack, aligned to a cache line.
> > + */
> > +#define STACK_SIZE 256
> > +extern void call_with_stack(void (*fn)(void *), void *arg, void *sp);
> > +static int bL_switchpoint(unsigned long _arg)
> > +{
> > +       unsigned int mpidr = read_mpidr();
> > +       unsigned int cpuid = MPIDR_AFFINITY_LEVEL(mpidr, 0);
> > +       unsigned int clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1);
> > +       unsigned int cpu_index = cpuid + clusterid * MAX_CPUS_PER_CLUSTER;
> > +       void *stack = &init_thread_info + 1;
> > +       stack = PTR_ALIGN(stack, L1_CACHE_BYTES);
> 
> Are there any implications if there were big.LITTLE cores with different cache
> line sizes? The LWN article that you reference mentions different cache
> topologies...

This should always be the largest of all L1 cache line sizes in the 
system.  So that is fine here.

> > +       stack += cpu_index * STACK_SIZE + STACK_SIZE;
> > +       call_with_stack(bL_do_switch, (void *)_arg, stack);
> > +       BUG();
> > +}
> > +
> > +/*
> > + * Generic switcher interface
> > + */
> > +
> > +/*
> > + * bL_switch_to - Switch to a specific cluster for the current CPU
> > + * @new_cluster_id: the ID of the cluster to switch to.
> > + *
> > + * This function must be called on the CPU to be switched.
> > + * Returns 0 on success, else a negative status code.
> > + */
> > +static int bL_switch_to(unsigned int new_cluster_id)
> > +{
> > +       unsigned int mpidr, cpuid, clusterid, ob_cluster, ib_cluster,
> > this_cpu;
> > +       int ret;
> > +
> > +       mpidr = read_mpidr();
> > +       cpuid = MPIDR_AFFINITY_LEVEL(mpidr, 0);
> > +       clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1);
> > +       ob_cluster = clusterid;
> > +       ib_cluster = clusterid ^ 1;
> > +
> > +       if (new_cluster_id == clusterid)
> > +               return 0;
> > +
> > +       pr_debug("before switch: CPU %d in cluster %d\n", cpuid, clusterid);
> > +
> > +       /* Close the gate for our entry vectors */
> > +       mcpm_set_entry_vector(cpuid, ob_cluster, NULL);
> > +       mcpm_set_entry_vector(cpuid, ib_cluster, NULL);
> > +
> > +       /*
> > +        * Let's wake up the inbound CPU now in case it requires some delay
> > +        * to come online, but leave it gated in our entry vector code.
> > +        */
> > +       ret = mcpm_cpu_power_up(cpuid, ib_cluster);
> > +       if (ret) {
> > +               pr_err("%s: mcpm_cpu_power_up() returned %d\n", __func__,
> > ret);
> > +               return ret;
> > +       }
> > +
> > +       /*
> > +        * From this point we are entering the switch critical zone
> > +        * and can't sleep/schedule anymore.
> > +        */
> 
> Will this function ever be called from anywhere other than bL_switch_request?
> As far as I could see, bl_switch_request uses two different methods of
> ensuring that we will still be on the same CPU throughout this function (if
> called directly the get_cpu and put_cpu dis/enbale pre-emption and via the
> work queue we're guaranteed the same logical cpu). The different approaches
> somewhat obfuscate the need for something to have been done even before this
> point to stop us getting migrated to another CPU, I think...

True.  This comment is especially valid when moving to dedicated kernel 
threads but has no sense here..  It probably migrated to this patch by 
mistake during one of the cleanups.  Right here in this patch it should 
read "... and can't take any interrupts anymore."

> > +       local_irq_disable();
> > +       local_fiq_disable();
> > +
> > +       this_cpu = smp_processor_id();
> > +
> > +       /* redirect GIC's SGIs to our counterpart */
> > +       gic_migrate_target(cpuid + ib_cluster*4);
> > +
> > +       /*
> > +        * Raise a SGI on the inbound CPU to make sure it doesn't stall
> > +        * in a possible WFI, such as in mcpm_power_down().
> > +        */
> > +       arch_send_wakeup_ipi_mask(cpumask_of(this_cpu));
> > +
> > +       ret = cpu_pm_enter();
> > +
> > +       /* we can not tolerate errors at this point */
> > +       if (ret)
> > +               panic("%s: cpu_pm_enter() returned %d\n", __func__, ret);
> > +
> > +       /*
> > +        * Flip the cluster in the CPU logical map for this CPU.
> > +        * This must be flushed to RAM as the resume code
> > +        * needs to access it while the caches are still disabled.
> > +        */
> > +       cpu_logical_map(this_cpu) ^= (1 << 8);
> > +       sync_cache_w(&cpu_logical_map(this_cpu));
> > +
> > +       /* Let's do the actual CPU switch. */
> > +       ret = cpu_suspend(0, bL_switchpoint);
> > +       if (ret > 0)
> > +               panic("%s: cpu_suspend() returned %d\n", __func__, ret);
> > +
> > +       /* We are executing on the inbound CPU at this point */
> 
> Very cool :)
> 
> > +       mpidr = read_mpidr();
> > +       cpuid = MPIDR_AFFINITY_LEVEL(mpidr, 0);
> > +       clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1);
> > +       pr_debug("after switch: CPU %d in cluster %d\n", cpuid, clusterid);
> 
> Do we want a 'switcher' prefix on these? "bL_switcher: after switch..." etc?

I don't think busting the 80 column limit for this is worth it.
And when you turn on debugging you certainly know exactly what messages 
you're looking for.

> > +       BUG_ON(clusterid != ib_cluster);
> > +
> > +       mcpm_cpu_powered_up();
> > +
> > +       ret = cpu_pm_exit();
> > +
> > +       local_fiq_enable();
> > +       local_irq_enable();
> > +
> > +       if (ret)
> > +               pr_err("%s exiting with error %d\n", __func__, ret);
> > +       return ret;
> > +}
> > +
> > +struct switch_args {
> > +       unsigned int cluster;
> > +       struct work_struct work;
> > +};
> > +
> > +static void __bL_switch_to(struct work_struct *work)
> > +{
> > +       struct switch_args *args = container_of(work, struct switch_args,
> > work);
> > +       bL_switch_to(args->cluster);
> > +}
> > +
> > +/*
> > + * bL_switch_request - Switch to a specific cluster for the given CPU
> > + *
> > + * @cpu: the CPU to switch
> > + * @new_cluster_id: the ID of the cluster to switch to.
> > + *
> > + * This function causes a cluster switch on the given CPU.  If the given
> > + * CPU is the same as the calling CPU then the switch happens right away.
> > + * Otherwise the request is put on a work queue to be scheduled on the
> > + * remote CPU.
> > + */
> > +void bL_switch_request(unsigned int cpu, unsigned int new_cluster_id)
> > +{
> > +       unsigned int this_cpu = get_cpu();
> > +       struct switch_args args;
> > +
> > +       if (cpu == this_cpu) {
> > +               bL_switch_to(new_cluster_id);
> > +               put_cpu();
> > +               return;
> > +       }
> > +       put_cpu();
> > +
> > +       args.cluster = new_cluster_id;
> > +       INIT_WORK_ONSTACK(&args.work, __bL_switch_to);
> > +       schedule_work_on(cpu, &args.work);
> 
> The comment at the top of schedule_work_on() says:
>  [...]
>  * We queue the work to a specific CPU, the caller must ensure it
>  * can't go away.
>  [...]
> 
> I see we don't disable cpu hotplug requests until later in the series - does
> that matter? Until that patch, it seems that we could hotplug off the core
> that we'd scheduled the work onto?

Much worse than that could happen if e.g. someone were to hotplug in a 
counterpart CPU here.  So really this is not something I'd worry about 
for this early patch in the series.

> Hope some of those questions are helpful,

Absolutely.


Nicolas

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH 03/13] ARM: b.L: core switcher code
  2013-07-23  3:31 ` [PATCH 03/13] ARM: b.L: core switcher code Nicolas Pitre
  2013-07-25 17:15   ` Jonathan Austin
@ 2013-07-26 10:45   ` Lorenzo Pieralisi
  2013-07-26 14:29     ` Nicolas Pitre
  2013-07-26 14:53   ` Russell King - ARM Linux
  2 siblings, 1 reply; 43+ messages in thread
From: Lorenzo Pieralisi @ 2013-07-26 10:45 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Jul 23, 2013 at 04:31:19AM +0100, Nicolas Pitre wrote:
> This is the core code implementing big.LITTLE switcher functionality.
> Rational for this code is available here:
> 
> http://lwn.net/Articles/481055/
> 
> The main entry point for a switch request is:
> 
> void bL_switch_request(unsigned int cpu, unsigned int new_cluster_id)
> 
> If the calling CPU is not the wanted one, this wrapper takes care of
> sending the request to the appropriate CPU with schedule_work_on().
> 
> At the moment the core switch operation is handled by bL_switch_to()
> which must be called on the CPU for which a switch is requested.
> 
> What this code does:
> 
>   * Return early if the current cluster is the wanted one.
> 
>   * Close the gate in the kernel entry vector for both the inbound
>     and outbound CPUs.
> 
>   * Wake up the inbound CPU so it can perform its reset sequence in
>     parallel up to the kernel entry vector gate.
> 
>   * Migrate all interrupts in the GIC targeting the outbound CPU
>     interface to the inbound CPU interface, including SGIs. This is
>     performed by gic_migrate_target() in drivers/irqchip/irq-gic.c.
> 
>   * Call cpu_pm_enter() which takes care of flushing the VFP state to
>     RAM and save the CPU interface config from the GIC to RAM.

If I read the code properly, you always resume on the same logical cpu id
you suspended on (different HW CPU of course), correct ? This is to validate
my assumptions on CPU PM notifiers and other code.

> 
>   * Modify the cpu_logical_map to refer to the inbound physical CPU.
> 
>   * Call cpu_suspend() which saves the CPU state (general purpose
>     registers, page table address) onto the stack and store the
>     resulting stack pointer in an array indexed by the updated
>     cpu_logical_map, then call the provided shutdown function.
>     This happens in arch/arm/kernel/sleep.S.
> 
> At this point, the provided shutdown function executed by the outbound
> CPU ungates the inbound CPU. Therefore the inbound CPU:
> 
>   * Picks up the saved stack pointer in the array indexed by its MPIDR
>     in arch/arm/kernel/sleep.S.
> 
>   * The MMU and caches are re-enabled using the saved state on the
>     provided stack, just like if this was a resume operation from a
>     suspended state.
> 
>   * Then cpu_suspend() returns, although this is on the inbound CPU
>     rather than the outbound CPU which called it initially.
> 
>   * The function cpu_pm_exit() is called which effect is to restore the
>     CPU interface state in the GIC using the state previously saved by
>     the outbound CPU.
> 
>   * Exit of bL_switch_to() to resume normal kernel execution on the
>     new CPU.
> 
> However, the outbound CPU is potentially still running in parallel while
> the inbound CPU is resuming normal kernel execution, hence we need
> per CPU stack isolation to execute bL_do_switch().  After the outbound
> CPU has ungated the inbound CPU, it calls mcpm_cpu_power_down() to:
> 
>   * Clean its L1 cache.
> 
>   * If it is the last CPU still alive in its cluster (last man standing),
>     it also cleans its L2 cache and disables cache snooping from the other
>     cluster.
> 
>   * Power down the CPU (or whole cluster).
> 
> Code called from bL_do_switch() might end up referencing 'current' for
> some reasons.  However, 'current' is derived from the stack pointer.
> With any arbitrary stack, the returned value for 'current' and any
> dereferenced values through it are just random garbage which may lead to
> segmentation faults.
> 
> The active page table during the execution of bL_do_switch() is also a
> problem.  There is no guarantee that the inbound CPU won't destroy the
> corresponding task which would free the attached page table while the
> outbound CPU is still running and relying on it.

Yes this is really a sticking point, some comments below.

> To solve both issues, we borrow some of the task space belonging to
> the init/idle task which, by its nature, is lightly used and therefore
> is unlikely to clash with our usage.  The init task is also never going
> away.

Unlikely does not mean it will always work and I think it is best
avoided. Better stick to kernel threads, or a temporary stack but I
understand for a temporary stack to work you have to force constraints
on the mcpm backend you do not necessarily want to. More below.

> Right now the logical CPU number is assumed to be equivalent to the
> physical CPU number within each cluster. The kernel should also be
> booted with only one cluster active.  These limitations will be lifted
> eventually.
> 
> Signed-off-by: Nicolas Pitre <nico@linaro.org>
> ---
>  arch/arm/Kconfig                   |  17 +++
>  arch/arm/common/Makefile           |   1 +
>  arch/arm/common/bL_switcher.c      | 247 +++++++++++++++++++++++++++++++++++++
>  arch/arm/include/asm/bL_switcher.h |  17 +++
>  4 files changed, 282 insertions(+)
>  create mode 100644 arch/arm/common/bL_switcher.c
>  create mode 100644 arch/arm/include/asm/bL_switcher.h
> 
> diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
> index ba412e02ec..2c9e5bf734 100644
> --- a/arch/arm/Kconfig
> +++ b/arch/arm/Kconfig
> @@ -1538,6 +1538,23 @@ config MCPM
>           for (multi-)cluster based systems, such as big.LITTLE based
>           systems.
> 
> +config BIG_LITTLE
> +       bool "big.LITTLE support (Experimental)"
> +       depends on CPU_V7 && SMP
> +       select MCPM
> +       help
> +         This option enables support for the big.LITTLE architecture.

Probably this belongs in a separate patch (CPU idle driver probably needs
it too). Not a big deal though (and honestly I do not know how you can
justify a single patch for that - we need to add this config option, somehow).

> +config BL_SWITCHER
> +       bool "big.LITTLE switcher support"
> +       depends on BIG_LITTLE && MCPM && HOTPLUG_CPU
> +       select CPU_PM
> +       select ARM_CPU_SUSPEND
> +       help
> +         The big.LITTLE "switcher" provides the core functionality to
> +         transparently handle transition between a cluster of A15's
> +         and a cluster of A7's in a big.LITTLE system.
> +
>  choice
>         prompt "Memory split"
>         default VMSPLIT_3G
> diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile
> index 8c60f473e9..2586240d5a 100644
> --- a/arch/arm/common/Makefile
> +++ b/arch/arm/common/Makefile
> @@ -17,3 +17,4 @@ obj-$(CONFIG_MCPM)            += mcpm_head.o mcpm_entry.o mcpm_platsmp.o vlock.o
>  AFLAGS_mcpm_head.o             := -march=armv7-a
>  AFLAGS_vlock.o                 := -march=armv7-a
>  obj-$(CONFIG_TI_PRIV_EDMA)     += edma.o
> +obj-$(CONFIG_BL_SWITCHER)      += bL_switcher.o
> diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
> new file mode 100644
> index 0000000000..e63881b430
> --- /dev/null
> +++ b/arch/arm/common/bL_switcher.c
> @@ -0,0 +1,247 @@
> +/*
> + * arch/arm/common/bL_switcher.c -- big.LITTLE cluster switcher core driver
> + *
> + * Created by: Nicolas Pitre, March 2012
> + * Copyright:  (C) 2012-2013  Linaro Limited
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include <linux/init.h>
> +#include <linux/kernel.h>
> +#include <linux/module.h>
> +#include <linux/sched.h>
> +#include <linux/interrupt.h>
> +#include <linux/cpu_pm.h>
> +#include <linux/workqueue.h>
> +#include <linux/mm.h>
> +#include <linux/string.h>
> +#include <linux/irqchip/arm-gic.h>
> +
> +#include <asm/smp_plat.h>
> +#include <asm/cacheflush.h>
> +#include <asm/suspend.h>
> +#include <asm/mcpm.h>
> +#include <asm/bL_switcher.h>
> +
> +
> +/*
> + * Use our own MPIDR accessors as the generic ones in asm/cputype.h have
> + * __attribute_const__ and we don't want the compiler to assume any
> + * constness here as the value _does_ change along some code paths.
> + */
> +
> +static int read_mpidr(void)
> +{
> +       unsigned int id;
> +       asm volatile ("mrc\tp15, 0, %0, c0, c0, 5" : "=r" (id));
> +       return id & MPIDR_HWID_BITMASK;
> +}
> +
> +/*
> + * bL switcher core code.
> + */
> +
> +static void bL_do_switch(void *_unused)
> +{
> +       unsigned mpidr, cpuid, clusterid, ob_cluster, ib_cluster;
> +
> +       /*
> +        * We now have a piece of stack borrowed from the init task's.
> +        * Let's also switch to init_mm right away to match it.
> +        */
> +       cpu_switch_mm(init_mm.pgd, &init_mm);

Is this cpu_switch_mm necessary ? You are executing it to make sure you
get to wfi with page tables that can't be deallocated by the running inbound
if I got it right. It is not needed to use the init stack, but you know
that.

> +
> +       pr_debug("%s\n", __func__);
> +
> +       mpidr = read_mpidr();
> +       cpuid = MPIDR_AFFINITY_LEVEL(mpidr, 0);
> +       clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1);
> +       ob_cluster = clusterid;
> +       ib_cluster = clusterid ^ 1;
> +
> +       /*
> +        * Our state has been saved at this point.  Let's release our
> +        * inbound CPU.
> +        */
> +       mcpm_set_entry_vector(cpuid, ib_cluster, cpu_resume);
> +       sev();
> +
> +       /*
> +        * From this point, we must assume that our counterpart CPU might
> +        * have taken over in its parallel world already, as if execution
> +        * just returned from cpu_suspend().  It is therefore important to
> +        * be very careful not to make any change the other guy is not
> +        * expecting.  This is why we need stack isolation.
> +        *
> +        * Fancy under cover tasks could be performed here.  For now
> +        * we have none.
> +        */
> +
> +       /* Let's put ourself down. */
> +       mcpm_cpu_power_down();
> +
> +       /* should never get here */
> +       BUG();
> +}
> +
> +/*
> + * Stack isolation.  To ensure 'current' remains valid, we just borrow
> + * a slice of the init/idle task which should be fairly lightly used.
> + * The borrowed area starts just above the thread_info structure located
> + * at the very bottom of the stack, aligned to a cache line.
> + */
> +#define STACK_SIZE 256
> +extern void call_with_stack(void (*fn)(void *), void *arg, void *sp);
> +static int bL_switchpoint(unsigned long _arg)
> +{
> +       unsigned int mpidr = read_mpidr();
> +       unsigned int cpuid = MPIDR_AFFINITY_LEVEL(mpidr, 0);
> +       unsigned int clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1);
> +       unsigned int cpu_index = cpuid + clusterid * MAX_CPUS_PER_CLUSTER;
> +       void *stack = &init_thread_info + 1;
> +       stack = PTR_ALIGN(stack, L1_CACHE_BYTES);
> +       stack += cpu_index * STACK_SIZE + STACK_SIZE;

I do not like this much , for two reasons. Firstly because it might not work,
secondly because whatever current usage we do from that point onwards
is attached to the init thread, I am not sure that's 100% safe either.

Moving to kernel threads suffers from the same issue, even though I
should grant there it is easier to monitor stack usage and grab safely
some starting at the bottom. At least the current pointer behaviour is kept
coherent. Overall the risk of using current on both outbound and inbound
CPUs at once with no coordination is a bit scary, probably it is best to
prevent current pointer usage altogether (but this forces constraints on
MCPM). I will give it more thought.

> +       call_with_stack(bL_do_switch, (void *)_arg, stack);
> +       BUG();
> +}
> +
> +/*
> + * Generic switcher interface
> + */
> +
> +/*
> + * bL_switch_to - Switch to a specific cluster for the current CPU
> + * @new_cluster_id: the ID of the cluster to switch to.
> + *
> + * This function must be called on the CPU to be switched.
> + * Returns 0 on success, else a negative status code.
> + */
> +static int bL_switch_to(unsigned int new_cluster_id)
> +{
> +       unsigned int mpidr, cpuid, clusterid, ob_cluster, ib_cluster, this_cpu;
> +       int ret;
> +
> +       mpidr = read_mpidr();
> +       cpuid = MPIDR_AFFINITY_LEVEL(mpidr, 0);
> +       clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1);
> +       ob_cluster = clusterid;
> +       ib_cluster = clusterid ^ 1;
> +
> +       if (new_cluster_id == clusterid)
> +               return 0;
> +
> +       pr_debug("before switch: CPU %d in cluster %d\n", cpuid, clusterid);
> +
> +       /* Close the gate for our entry vectors */
> +       mcpm_set_entry_vector(cpuid, ob_cluster, NULL);
> +       mcpm_set_entry_vector(cpuid, ib_cluster, NULL);
> +
> +       /*
> +        * Let's wake up the inbound CPU now in case it requires some delay
> +        * to come online, but leave it gated in our entry vector code.
> +        */
> +       ret = mcpm_cpu_power_up(cpuid, ib_cluster);
> +       if (ret) {
> +               pr_err("%s: mcpm_cpu_power_up() returned %d\n", __func__, ret);
> +               return ret;
> +       }
> +
> +       /*
> +        * From this point we are entering the switch critical zone
> +        * and can't sleep/schedule anymore.
> +        */
> +       local_irq_disable();
> +       local_fiq_disable();
> +
> +       this_cpu = smp_processor_id();
> +
> +       /* redirect GIC's SGIs to our counterpart */
> +       gic_migrate_target(cpuid + ib_cluster*4);
> +
> +       /*
> +        * Raise a SGI on the inbound CPU to make sure it doesn't stall
> +        * in a possible WFI, such as in mcpm_power_down().
> +        */
> +       arch_send_wakeup_ipi_mask(cpumask_of(this_cpu));
> +
> +       ret = cpu_pm_enter();
> +
> +       /* we can not tolerate errors at this point */
> +       if (ret)
> +               panic("%s: cpu_pm_enter() returned %d\n", __func__, ret);
> +
> +       /*
> +        * Flip the cluster in the CPU logical map for this CPU.
> +        * This must be flushed to RAM as the resume code
> +        * needs to access it while the caches are still disabled.
> +        */
> +       cpu_logical_map(this_cpu) ^= (1 << 8);
> +       sync_cache_w(&cpu_logical_map(this_cpu));

sync_w is not needed anymore, cpu_resume does not use cpu_logical_map.

Thanks,
Lorenzo

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH 01/13] ARM: suspend: use hash of cpu_logical_map value to index into save array
  2013-07-25 16:06         ` Nicolas Pitre
@ 2013-07-26 11:31           ` Lorenzo Pieralisi
  2013-07-26 14:41             ` Nicolas Pitre
  0 siblings, 1 reply; 43+ messages in thread
From: Lorenzo Pieralisi @ 2013-07-26 11:31 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, Jul 25, 2013 at 05:06:51PM +0100, Nicolas Pitre wrote:
> On Thu, 25 Jul 2013, Dave Martin wrote:
> 
> > On Wed, Jul 24, 2013 at 02:55:00PM -0400, Nicolas Pitre wrote:
> > > On Wed, 24 Jul 2013, Dave Martin wrote:
> > > 
> > > > But this patch commits us to requiring that on the suspend path 
> > > > specifically -- I think that ought to be mentioned somewhere. A 
> > > > comment in the preamble for __cpu_suspend would be enough, I think.
> > > 
> > > What comment would you suggest?  I want to make sure the possible 
> > > confusion you see is properly addressed.
> > 
> > I think we just need to state that the value of
> > cpu_logical_map(smp_processor_id()) must be the MPIDR of the physical
> > CPU the suspending logical CPU will resume on.  Consequently, if doing a
> > migration, cpu_logical_map() must be updated appropriately somewhere
> > between cpu_pm_enter() and cpu_suspend().
> 
> Excellent.  I've amended the patch with this:
> 
> diff --git a/arch/arm/kernel/suspend.c b/arch/arm/kernel/suspend.c
> index 2835d35234..caf938db62 100644
> --- a/arch/arm/kernel/suspend.c
> +++ b/arch/arm/kernel/suspend.c
> @@ -17,6 +17,11 @@ extern void cpu_resume_mmu(void);
>  /*
>   * Hide the first two arguments to __cpu_suspend - these are an implementation
>   * detail which platform code shouldn't have to know about.
> + *
> + * On SMP systems, the value of cpu_logical_map(smp_processor_id()) must be
> + * the MPIDR of the physical CPU the suspending logical CPU will resume on.
> + * Consequently, if doing a physical CPU migration, cpu_logical_map() must be
> + * updated appropriately somewhere between cpu_pm_enter() and cpu_suspend().
>   */
>  int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
>  {
> 
> I've put it against cpu_suspend() rather than __cpu_suspend(() as this 
> is what users should care about.
> 
> ACK?

We need this patch to allow IKS to store a cpu context at a specific
index, let's be honest. It is a moot point and I am not very happy
about changing this code for a very specific usage, but the way code is
implemented makes the change acceptable. I really do not think we should
write guidelines on how cpu_suspend users have to change cpu_logical_map
though, that's not needed apart from IKS and that should be limited to IKS
code only.

Again, that's just my opinion, but cpu_suspend API must be kept as it is
and we should not encourage people to use it in creative ways.

Lorenzo

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH 03/13] ARM: b.L: core switcher code
  2013-07-26 10:45   ` Lorenzo Pieralisi
@ 2013-07-26 14:29     ` Nicolas Pitre
  0 siblings, 0 replies; 43+ messages in thread
From: Nicolas Pitre @ 2013-07-26 14:29 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, 26 Jul 2013, Lorenzo Pieralisi wrote:

> On Tue, Jul 23, 2013 at 04:31:19AM +0100, Nicolas Pitre wrote:
> > This is the core code implementing big.LITTLE switcher functionality.
> > Rational for this code is available here:
> > 
> > http://lwn.net/Articles/481055/
> > 
> > The main entry point for a switch request is:
> > 
> > void bL_switch_request(unsigned int cpu, unsigned int new_cluster_id)
> > 
> > If the calling CPU is not the wanted one, this wrapper takes care of
> > sending the request to the appropriate CPU with schedule_work_on().
> > 
> > At the moment the core switch operation is handled by bL_switch_to()
> > which must be called on the CPU for which a switch is requested.
> > 
> > What this code does:
> > 
> >   * Return early if the current cluster is the wanted one.
> > 
> >   * Close the gate in the kernel entry vector for both the inbound
> >     and outbound CPUs.
> > 
> >   * Wake up the inbound CPU so it can perform its reset sequence in
> >     parallel up to the kernel entry vector gate.
> > 
> >   * Migrate all interrupts in the GIC targeting the outbound CPU
> >     interface to the inbound CPU interface, including SGIs. This is
> >     performed by gic_migrate_target() in drivers/irqchip/irq-gic.c.
> > 
> >   * Call cpu_pm_enter() which takes care of flushing the VFP state to
> >     RAM and save the CPU interface config from the GIC to RAM.
> 
> If I read the code properly, you always resume on the same logical cpu id
> you suspended on (different HW CPU of course), correct ? This is to validate
> my assumptions on CPU PM notifiers and other code.

Yes, the logical CPU number stays the same.  That's the only way to make 
a transparent migration for core kernel code.

As this patch stands, the physical CPU number will be the same as the 
logical CPU number as well (even though the cluster number changes).  
But that particular assumption is removed later on.

> > 
> >   * Modify the cpu_logical_map to refer to the inbound physical CPU.
> > 
> >   * Call cpu_suspend() which saves the CPU state (general purpose
> >     registers, page table address) onto the stack and store the
> >     resulting stack pointer in an array indexed by the updated
> >     cpu_logical_map, then call the provided shutdown function.
> >     This happens in arch/arm/kernel/sleep.S.
> > 
> > At this point, the provided shutdown function executed by the outbound
> > CPU ungates the inbound CPU. Therefore the inbound CPU:
> > 
> >   * Picks up the saved stack pointer in the array indexed by its MPIDR
> >     in arch/arm/kernel/sleep.S.
> > 
> >   * The MMU and caches are re-enabled using the saved state on the
> >     provided stack, just like if this was a resume operation from a
> >     suspended state.
> > 
> >   * Then cpu_suspend() returns, although this is on the inbound CPU
> >     rather than the outbound CPU which called it initially.
> > 
> >   * The function cpu_pm_exit() is called which effect is to restore the
> >     CPU interface state in the GIC using the state previously saved by
> >     the outbound CPU.
> > 
> >   * Exit of bL_switch_to() to resume normal kernel execution on the
> >     new CPU.
> > 
> > However, the outbound CPU is potentially still running in parallel while
> > the inbound CPU is resuming normal kernel execution, hence we need
> > per CPU stack isolation to execute bL_do_switch().  After the outbound
> > CPU has ungated the inbound CPU, it calls mcpm_cpu_power_down() to:
> > 
> >   * Clean its L1 cache.
> > 
> >   * If it is the last CPU still alive in its cluster (last man standing),
> >     it also cleans its L2 cache and disables cache snooping from the other
> >     cluster.
> > 
> >   * Power down the CPU (or whole cluster).
> > 
> > Code called from bL_do_switch() might end up referencing 'current' for
> > some reasons.  However, 'current' is derived from the stack pointer.
> > With any arbitrary stack, the returned value for 'current' and any
> > dereferenced values through it are just random garbage which may lead to
> > segmentation faults.
> > 
> > The active page table during the execution of bL_do_switch() is also a
> > problem.  There is no guarantee that the inbound CPU won't destroy the
> > corresponding task which would free the attached page table while the
> > outbound CPU is still running and relying on it.
> 
> Yes this is really a sticking point, some comments below.
> 
> > To solve both issues, we borrow some of the task space belonging to
> > the init/idle task which, by its nature, is lightly used and therefore
> > is unlikely to clash with our usage.  The init task is also never going
> > away.
> 
> Unlikely does not mean it will always work and I think it is best
> avoided. Better stick to kernel threads, or a temporary stack but I
> understand for a temporary stack to work you have to force constraints
> on the mcpm backend you do not necessarily want to. More below.

Note that this is indeed changed later on.  But I wanted the code to be 
simpler as a first introductory patch.

And because kernel code expects the 'current' pointer to be valid, even 
in unexpected places such as printk(), this stack needs to be tied to a 
thread-info struct.

> > Right now the logical CPU number is assumed to be equivalent to the
> > physical CPU number within each cluster. The kernel should also be
> > booted with only one cluster active.  These limitations will be lifted
> > eventually.
> > 
> > Signed-off-by: Nicolas Pitre <nico@linaro.org>
> > ---
> >  arch/arm/Kconfig                   |  17 +++
> >  arch/arm/common/Makefile           |   1 +
> >  arch/arm/common/bL_switcher.c      | 247 +++++++++++++++++++++++++++++++++++++
> >  arch/arm/include/asm/bL_switcher.h |  17 +++
> >  4 files changed, 282 insertions(+)
> >  create mode 100644 arch/arm/common/bL_switcher.c
> >  create mode 100644 arch/arm/include/asm/bL_switcher.h
> > 
> > diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
> > index ba412e02ec..2c9e5bf734 100644
> > --- a/arch/arm/Kconfig
> > +++ b/arch/arm/Kconfig
> > @@ -1538,6 +1538,23 @@ config MCPM
> >           for (multi-)cluster based systems, such as big.LITTLE based
> >           systems.
> > 
> > +config BIG_LITTLE
> > +       bool "big.LITTLE support (Experimental)"
> > +       depends on CPU_V7 && SMP
> > +       select MCPM
> > +       help
> > +         This option enables support for the big.LITTLE architecture.
> 
> Probably this belongs in a separate patch (CPU idle driver probably needs
> it too). Not a big deal though (and honestly I do not know how you can
> justify a single patch for that - we need to add this config option, somehow).
> 
> > +config BL_SWITCHER
> > +       bool "big.LITTLE switcher support"
> > +       depends on BIG_LITTLE && MCPM && HOTPLUG_CPU
> > +       select CPU_PM
> > +       select ARM_CPU_SUSPEND
> > +       help
> > +         The big.LITTLE "switcher" provides the core functionality to
> > +         transparently handle transition between a cluster of A15's
> > +         and a cluster of A7's in a big.LITTLE system.
> > +
> >  choice
> >         prompt "Memory split"
> >         default VMSPLIT_3G
> > diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile
> > index 8c60f473e9..2586240d5a 100644
> > --- a/arch/arm/common/Makefile
> > +++ b/arch/arm/common/Makefile
> > @@ -17,3 +17,4 @@ obj-$(CONFIG_MCPM)            += mcpm_head.o mcpm_entry.o mcpm_platsmp.o vlock.o
> >  AFLAGS_mcpm_head.o             := -march=armv7-a
> >  AFLAGS_vlock.o                 := -march=armv7-a
> >  obj-$(CONFIG_TI_PRIV_EDMA)     += edma.o
> > +obj-$(CONFIG_BL_SWITCHER)      += bL_switcher.o
> > diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
> > new file mode 100644
> > index 0000000000..e63881b430
> > --- /dev/null
> > +++ b/arch/arm/common/bL_switcher.c
> > @@ -0,0 +1,247 @@
> > +/*
> > + * arch/arm/common/bL_switcher.c -- big.LITTLE cluster switcher core driver
> > + *
> > + * Created by: Nicolas Pitre, March 2012
> > + * Copyright:  (C) 2012-2013  Linaro Limited
> > + *
> > + * This program is free software; you can redistribute it and/or modify
> > + * it under the terms of the GNU General Public License version 2 as
> > + * published by the Free Software Foundation.
> > + */
> > +
> > +#include <linux/init.h>
> > +#include <linux/kernel.h>
> > +#include <linux/module.h>
> > +#include <linux/sched.h>
> > +#include <linux/interrupt.h>
> > +#include <linux/cpu_pm.h>
> > +#include <linux/workqueue.h>
> > +#include <linux/mm.h>
> > +#include <linux/string.h>
> > +#include <linux/irqchip/arm-gic.h>
> > +
> > +#include <asm/smp_plat.h>
> > +#include <asm/cacheflush.h>
> > +#include <asm/suspend.h>
> > +#include <asm/mcpm.h>
> > +#include <asm/bL_switcher.h>
> > +
> > +
> > +/*
> > + * Use our own MPIDR accessors as the generic ones in asm/cputype.h have
> > + * __attribute_const__ and we don't want the compiler to assume any
> > + * constness here as the value _does_ change along some code paths.
> > + */
> > +
> > +static int read_mpidr(void)
> > +{
> > +       unsigned int id;
> > +       asm volatile ("mrc\tp15, 0, %0, c0, c0, 5" : "=r" (id));
> > +       return id & MPIDR_HWID_BITMASK;
> > +}
> > +
> > +/*
> > + * bL switcher core code.
> > + */
> > +
> > +static void bL_do_switch(void *_unused)
> > +{
> > +       unsigned mpidr, cpuid, clusterid, ob_cluster, ib_cluster;
> > +
> > +       /*
> > +        * We now have a piece of stack borrowed from the init task's.
> > +        * Let's also switch to init_mm right away to match it.
> > +        */
> > +       cpu_switch_mm(init_mm.pgd, &init_mm);
> 
> Is this cpu_switch_mm necessary ? You are executing it to make sure you
> get to wfi with page tables that can't be deallocated by the running inbound
> if I got it right. It is not needed to use the init stack, but you know
> that.

This is also to ensure the active mm matches the value of the 'current' 
pointer.  Again this is going away in a subsequent patch when moving to 
dedicated kernel threads.

> > +
> > +       pr_debug("%s\n", __func__);
> > +
> > +       mpidr = read_mpidr();
> > +       cpuid = MPIDR_AFFINITY_LEVEL(mpidr, 0);
> > +       clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1);
> > +       ob_cluster = clusterid;
> > +       ib_cluster = clusterid ^ 1;
> > +
> > +       /*
> > +        * Our state has been saved at this point.  Let's release our
> > +        * inbound CPU.
> > +        */
> > +       mcpm_set_entry_vector(cpuid, ib_cluster, cpu_resume);
> > +       sev();
> > +
> > +       /*
> > +        * From this point, we must assume that our counterpart CPU might
> > +        * have taken over in its parallel world already, as if execution
> > +        * just returned from cpu_suspend().  It is therefore important to
> > +        * be very careful not to make any change the other guy is not
> > +        * expecting.  This is why we need stack isolation.
> > +        *
> > +        * Fancy under cover tasks could be performed here.  For now
> > +        * we have none.
> > +        */
> > +
> > +       /* Let's put ourself down. */
> > +       mcpm_cpu_power_down();
> > +
> > +       /* should never get here */
> > +       BUG();
> > +}
> > +
> > +/*
> > + * Stack isolation.  To ensure 'current' remains valid, we just borrow
> > + * a slice of the init/idle task which should be fairly lightly used.
> > + * The borrowed area starts just above the thread_info structure located
> > + * at the very bottom of the stack, aligned to a cache line.
> > + */
> > +#define STACK_SIZE 256
> > +extern void call_with_stack(void (*fn)(void *), void *arg, void *sp);
> > +static int bL_switchpoint(unsigned long _arg)
> > +{
> > +       unsigned int mpidr = read_mpidr();
> > +       unsigned int cpuid = MPIDR_AFFINITY_LEVEL(mpidr, 0);
> > +       unsigned int clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1);
> > +       unsigned int cpu_index = cpuid + clusterid * MAX_CPUS_PER_CLUSTER;
> > +       void *stack = &init_thread_info + 1;
> > +       stack = PTR_ALIGN(stack, L1_CACHE_BYTES);
> > +       stack += cpu_index * STACK_SIZE + STACK_SIZE;
> 
> I do not like this much , for two reasons. Firstly because it might not work,
> secondly because whatever current usage we do from that point onwards
> is attached to the init thread, I am not sure that's 100% safe either.
> 
> Moving to kernel threads suffers from the same issue, even though I
> should grant there it is easier to monitor stack usage and grab safely
> some starting at the bottom. At least the current pointer behaviour is kept
> coherent. Overall the risk of using current on both outbound and inbound
> CPUs at once with no coordination is a bit scary, probably it is best to
> prevent current pointer usage altogether (but this forces constraints on
> MCPM). I will give it more thought.

The only reason for keeping 'current' valid at the moment is for 
printk() to work. Otherwise random memory corruption does occur.

> > +       call_with_stack(bL_do_switch, (void *)_arg, stack);
> > +       BUG();
> > +}
> > +
> > +/*
> > + * Generic switcher interface
> > + */
> > +
> > +/*
> > + * bL_switch_to - Switch to a specific cluster for the current CPU
> > + * @new_cluster_id: the ID of the cluster to switch to.
> > + *
> > + * This function must be called on the CPU to be switched.
> > + * Returns 0 on success, else a negative status code.
> > + */
> > +static int bL_switch_to(unsigned int new_cluster_id)
> > +{
> > +       unsigned int mpidr, cpuid, clusterid, ob_cluster, ib_cluster, this_cpu;
> > +       int ret;
> > +
> > +       mpidr = read_mpidr();
> > +       cpuid = MPIDR_AFFINITY_LEVEL(mpidr, 0);
> > +       clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1);
> > +       ob_cluster = clusterid;
> > +       ib_cluster = clusterid ^ 1;
> > +
> > +       if (new_cluster_id == clusterid)
> > +               return 0;
> > +
> > +       pr_debug("before switch: CPU %d in cluster %d\n", cpuid, clusterid);
> > +
> > +       /* Close the gate for our entry vectors */
> > +       mcpm_set_entry_vector(cpuid, ob_cluster, NULL);
> > +       mcpm_set_entry_vector(cpuid, ib_cluster, NULL);
> > +
> > +       /*
> > +        * Let's wake up the inbound CPU now in case it requires some delay
> > +        * to come online, but leave it gated in our entry vector code.
> > +        */
> > +       ret = mcpm_cpu_power_up(cpuid, ib_cluster);
> > +       if (ret) {
> > +               pr_err("%s: mcpm_cpu_power_up() returned %d\n", __func__, ret);
> > +               return ret;
> > +       }
> > +
> > +       /*
> > +        * From this point we are entering the switch critical zone
> > +        * and can't sleep/schedule anymore.
> > +        */
> > +       local_irq_disable();
> > +       local_fiq_disable();
> > +
> > +       this_cpu = smp_processor_id();
> > +
> > +       /* redirect GIC's SGIs to our counterpart */
> > +       gic_migrate_target(cpuid + ib_cluster*4);
> > +
> > +       /*
> > +        * Raise a SGI on the inbound CPU to make sure it doesn't stall
> > +        * in a possible WFI, such as in mcpm_power_down().
> > +        */
> > +       arch_send_wakeup_ipi_mask(cpumask_of(this_cpu));
> > +
> > +       ret = cpu_pm_enter();
> > +
> > +       /* we can not tolerate errors at this point */
> > +       if (ret)
> > +               panic("%s: cpu_pm_enter() returned %d\n", __func__, ret);
> > +
> > +       /*
> > +        * Flip the cluster in the CPU logical map for this CPU.
> > +        * This must be flushed to RAM as the resume code
> > +        * needs to access it while the caches are still disabled.
> > +        */
> > +       cpu_logical_map(this_cpu) ^= (1 << 8);
> > +       sync_cache_w(&cpu_logical_map(this_cpu));
> 
> sync_w is not needed anymore, cpu_resume does not use cpu_logical_map.

Good point.


Nicolas

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH 01/13] ARM: suspend: use hash of cpu_logical_map value to index into save array
  2013-07-26 11:31           ` Lorenzo Pieralisi
@ 2013-07-26 14:41             ` Nicolas Pitre
  2013-07-26 15:34               ` Dave Martin
  0 siblings, 1 reply; 43+ messages in thread
From: Nicolas Pitre @ 2013-07-26 14:41 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, 26 Jul 2013, Lorenzo Pieralisi wrote:

> On Thu, Jul 25, 2013 at 05:06:51PM +0100, Nicolas Pitre wrote:
> > On Thu, 25 Jul 2013, Dave Martin wrote:
> > 
> > > On Wed, Jul 24, 2013 at 02:55:00PM -0400, Nicolas Pitre wrote:
> > > > On Wed, 24 Jul 2013, Dave Martin wrote:
> > > > 
> > > > > But this patch commits us to requiring that on the suspend path 
> > > > > specifically -- I think that ought to be mentioned somewhere. A 
> > > > > comment in the preamble for __cpu_suspend would be enough, I think.
> > > > 
> > > > What comment would you suggest?  I want to make sure the possible 
> > > > confusion you see is properly addressed.
> > > 
> > > I think we just need to state that the value of
> > > cpu_logical_map(smp_processor_id()) must be the MPIDR of the physical
> > > CPU the suspending logical CPU will resume on.  Consequently, if doing a
> > > migration, cpu_logical_map() must be updated appropriately somewhere
> > > between cpu_pm_enter() and cpu_suspend().
> > 
> > Excellent.  I've amended the patch with this:
> > 
> > diff --git a/arch/arm/kernel/suspend.c b/arch/arm/kernel/suspend.c
> > index 2835d35234..caf938db62 100644
> > --- a/arch/arm/kernel/suspend.c
> > +++ b/arch/arm/kernel/suspend.c
> > @@ -17,6 +17,11 @@ extern void cpu_resume_mmu(void);
> >  /*
> >   * Hide the first two arguments to __cpu_suspend - these are an implementation
> >   * detail which platform code shouldn't have to know about.
> > + *
> > + * On SMP systems, the value of cpu_logical_map(smp_processor_id()) must be
> > + * the MPIDR of the physical CPU the suspending logical CPU will resume on.
> > + * Consequently, if doing a physical CPU migration, cpu_logical_map() must be
> > + * updated appropriately somewhere between cpu_pm_enter() and cpu_suspend().
> >   */
> >  int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
> >  {
> > 
> > I've put it against cpu_suspend() rather than __cpu_suspend(() as this 
> > is what users should care about.
> > 
> > ACK?
> 
> We need this patch to allow IKS to store a cpu context at a specific
> index, let's be honest. It is a moot point and I am not very happy
> about changing this code for a very specific usage, but the way code is
> implemented makes the change acceptable. I really do not think we should
> write guidelines on how cpu_suspend users have to change cpu_logical_map
> though, that's not needed apart from IKS and that should be limited to IKS
> code only.
> 
> Again, that's just my opinion, but cpu_suspend API must be kept as it is
> and we should not encourage people to use it in creative ways.

I tend to agree, but I'm now stuck between two conflicting requests.

Are you saying you are willing to give me your ACK if I revert the 
suggested change?


Nicolas

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH 03/13] ARM: b.L: core switcher code
  2013-07-23  3:31 ` [PATCH 03/13] ARM: b.L: core switcher code Nicolas Pitre
  2013-07-25 17:15   ` Jonathan Austin
  2013-07-26 10:45   ` Lorenzo Pieralisi
@ 2013-07-26 14:53   ` Russell King - ARM Linux
  2013-07-26 15:10     ` Nicolas Pitre
  2 siblings, 1 reply; 43+ messages in thread
From: Russell King - ARM Linux @ 2013-07-26 14:53 UTC (permalink / raw)
  To: linux-arm-kernel

On Mon, Jul 22, 2013 at 11:31:19PM -0400, Nicolas Pitre wrote:
> To solve both issues, we borrow some of the task space belonging to
> the init/idle task which, by its nature, is lightly used and therefore
> is unlikely to clash with our usage.  The init task is also never going
> away.

That is... quite a horrid hack - and it is a hack.  You claim that
the idle task is not likely to clash, but with 8 CPUs each of
256 bytes of stack, that's 2k of stack.  With the thread data
structure at over 512 bytes, that's 2.5k of 8k.

Don't forget that the amount of kernel stack used depends on many
things - not just what the thread does (in the case of the idle
thread, that includes cpuidle stuff) but also the function call
depth of any interrupt in the system, as well as the path used to
get into the idle thread.

If you want to do this, it would be much better to use the bottom
of the stack space for the idle thread associated with the CPU.  The
idle threads are all forked at boot for any possible CPUs before the
secondaries are brought online, and so are persistent.  That would
reduce the additional required space to 256 bytes on the stack, not
2k on a single stack.

It also means that if we see larger clusters, we won't have this
in-built limitation caused by the size of the kernel stack.

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH 03/13] ARM: b.L: core switcher code
  2013-07-26 14:53   ` Russell King - ARM Linux
@ 2013-07-26 15:10     ` Nicolas Pitre
  0 siblings, 0 replies; 43+ messages in thread
From: Nicolas Pitre @ 2013-07-26 15:10 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, 26 Jul 2013, Russell King - ARM Linux wrote:

> On Mon, Jul 22, 2013 at 11:31:19PM -0400, Nicolas Pitre wrote:
> > To solve both issues, we borrow some of the task space belonging to
> > the init/idle task which, by its nature, is lightly used and therefore
> > is unlikely to clash with our usage.  The init task is also never going
> > away.
> 
> That is... quite a horrid hack - and it is a hack.  You claim that
> the idle task is not likely to clash, but with 8 CPUs each of
> 256 bytes of stack, that's 2k of stack.  With the thread data
> structure at over 512 bytes, that's 2.5k of 8k.

It is a hack indeed.  This way of doing things does not survive until 
the end of the patch series either.  But doing this for the actual first 
switcher patch makes the code much simpler to review.

> Don't forget that the amount of kernel stack used depends on many
> things - not just what the thread does (in the case of the idle
> thread, that includes cpuidle stuff) but also the function call
> depth of any interrupt in the system, as well as the path used to
> get into the idle thread.
> 
> If you want to do this, it would be much better to use the bottom
> of the stack space for the idle thread associated with the CPU.  The
> idle threads are all forked at boot for any possible CPUs before the
> secondaries are brought online, and so are persistent.  That would
> reduce the additional required space to 256 bytes on the stack, not
> 2k on a single stack.

What I do in a later patch is to use dedicated switcher threads per 
logical CPU, and the separate stack is taken from each thread's stack.  
That also means that only 2 special stack areas are needed instead of 8 
or more.

But kernel threads are introduced as a separate patch to ease review.


Nicolas

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH 05/13] ARM: bL_switcher: move to dedicated threads rather than workqueues
  2013-07-23  3:31 ` [PATCH 05/13] ARM: bL_switcher: move to dedicated threads rather than workqueues Nicolas Pitre
@ 2013-07-26 15:18   ` Lorenzo Pieralisi
  2013-07-26 15:39     ` Nicolas Pitre
  0 siblings, 1 reply; 43+ messages in thread
From: Lorenzo Pieralisi @ 2013-07-26 15:18 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Jul 23, 2013 at 04:31:21AM +0100, Nicolas Pitre wrote:
> The workqueues are problematic as they may be contended.
> They can't be scheduled with top priority either.  Also the optimization
> in bL_switch_request() to skip the workqueue entirely when the target CPU
> and the calling CPU were the same didn't allow for bL_switch_request() to
> be called from atomic context, as might be the case for some cpufreq
> drivers.
> 
> Let's move to dedicated kthreads instead.
> 
> Signed-off-by: Nicolas Pitre <nico@linaro.org>
> ---
>  arch/arm/common/bL_switcher.c      | 101 ++++++++++++++++++++++++++++---------
>  arch/arm/include/asm/bL_switcher.h |   2 +-
>  2 files changed, 79 insertions(+), 24 deletions(-)
> 
> diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
> index d6f7e507d9..c2355cafc9 100644
> --- a/arch/arm/common/bL_switcher.c
> +++ b/arch/arm/common/bL_switcher.c
> @@ -15,8 +15,10 @@
>  #include <linux/sched.h>
>  #include <linux/interrupt.h>
>  #include <linux/cpu_pm.h>
> +#include <linux/cpu.h>
>  #include <linux/cpumask.h>
> -#include <linux/workqueue.h>
> +#include <linux/kthread.h>
> +#include <linux/wait.h>
>  #include <linux/clockchips.h>
>  #include <linux/hrtimer.h>
>  #include <linux/tick.h>
> @@ -225,15 +227,48 @@ static int bL_switch_to(unsigned int new_cluster_id)
>  	return ret;
>  }
>  
> -struct switch_args {
> -	unsigned int cluster;
> -	struct work_struct work;
> +struct bL_thread {
> +	struct task_struct *task;
> +	wait_queue_head_t wq;
> +	int wanted_cluster;
>  };
>  
> -static void __bL_switch_to(struct work_struct *work)
> +static struct bL_thread bL_threads[MAX_CPUS_PER_CLUSTER];
> +
> +static int bL_switcher_thread(void *arg)
> +{
> +	struct bL_thread *t = arg;
> +	struct sched_param param = { .sched_priority = 1 };
> +	int cluster;
> +
> +	sched_setscheduler_nocheck(current, SCHED_FIFO, &param);
> +
> +	do {
> +		if (signal_pending(current))
> +			flush_signals(current);
> +		wait_event_interruptible(t->wq,
> +				t->wanted_cluster != -1 ||
> +				kthread_should_stop());
> +		cluster = xchg(&t->wanted_cluster, -1);
> +		if (cluster != -1)
> +			bL_switch_to(cluster);
> +	} while (!kthread_should_stop());
> +
> +	return 0;
> +}
> +
> +static struct task_struct * __init bL_switcher_thread_create(int cpu, void *arg)
>  {
> -	struct switch_args *args = container_of(work, struct switch_args, work);
> -	bL_switch_to(args->cluster);
> +	struct task_struct *task;
> +
> +	task = kthread_create_on_node(bL_switcher_thread, arg,
> +				      cpu_to_node(cpu), "kswitcher_%d", cpu);
> +	if (!IS_ERR(task)) {
> +		kthread_bind(task, cpu);
> +		wake_up_process(task);
> +	} else
> +		pr_err("%s failed for CPU %d\n", __func__, cpu);
> +	return task;
>  }
>  
>  /*
> @@ -242,26 +277,46 @@ static void __bL_switch_to(struct work_struct *work)
>   * @cpu: the CPU to switch
>   * @new_cluster_id: the ID of the cluster to switch to.
>   *
> - * This function causes a cluster switch on the given CPU.  If the given
> - * CPU is the same as the calling CPU then the switch happens right away.
> - * Otherwise the request is put on a work queue to be scheduled on the
> - * remote CPU.
> + * This function causes a cluster switch on the given CPU by waking up
> + * the appropriate switcher thread.  This function may or may not return
> + * before the switch has occurred.
>   */
> -void bL_switch_request(unsigned int cpu, unsigned int new_cluster_id)
> +int bL_switch_request(unsigned int cpu, unsigned int new_cluster_id)
>  {
> -	unsigned int this_cpu = get_cpu();
> -	struct switch_args args;
> +	struct bL_thread *t;
>  
> -	if (cpu == this_cpu) {
> -		bL_switch_to(new_cluster_id);
> -		put_cpu();
> -		return;
> +	if (cpu >= MAX_CPUS_PER_CLUSTER) {
> +		pr_err("%s: cpu %d out of bounds\n", __func__, cpu);
> +		return -EINVAL;
>  	}
> -	put_cpu();
>  
> -	args.cluster = new_cluster_id;
> -	INIT_WORK_ONSTACK(&args.work, __bL_switch_to);
> -	schedule_work_on(cpu, &args.work);
> -	flush_work(&args.work);
> +	t = &bL_threads[cpu];
> +	if (IS_ERR(t->task))
> +		return PTR_ERR(t->task);
> +	if (!t->task)
> +		return -ESRCH;
> +
> +	t->wanted_cluster = new_cluster_id;
> +	wake_up(&t->wq);
> +	return 0;
>  }
>  EXPORT_SYMBOL_GPL(bL_switch_request);
> +
> +static int __init bL_switcher_init(void)
> +{
> +	int cpu;
> +
> +	pr_info("big.LITTLE switcher initializing\n");
> +
> +	for_each_online_cpu(cpu) {
> +		struct bL_thread *t = &bL_threads[cpu];
> +		init_waitqueue_head(&t->wq);

Dereferencing &bL_threads[cpu] is wrong without checking the cpu index against
MAX_CPUS_PER_CLUSTER. I know you hotplug out half of the CPUs in a later patch but
still, this code needs fixing.

> +		t->wanted_cluster = -1;
> +		t->task = bL_switcher_thread_create(cpu, t);
> +	}
> +
> +	pr_info("big.LITTLE switcher initialized\n");
> +	return 0;
> +}
> +
> +late_initcall(bL_switcher_init);
> diff --git a/arch/arm/include/asm/bL_switcher.h b/arch/arm/include/asm/bL_switcher.h
> index 72efe3f349..e0c0bba70b 100644
> --- a/arch/arm/include/asm/bL_switcher.h
> +++ b/arch/arm/include/asm/bL_switcher.h
> @@ -12,6 +12,6 @@
>  #ifndef ASM_BL_SWITCHER_H
>  #define ASM_BL_SWITCHER_H
>  
> -void bL_switch_request(unsigned int cpu, unsigned int new_cluster_id);
> +int bL_switch_request(unsigned int cpu, unsigned int new_cluster_id);

We probably discussed this before, and it is not strictly related to
this patch, but is locking/queuing necessary when requesting a switch ? I do
not remember the outcome of the discussion so I am asking again.

I will go through this patch in details later.

Lorenzo

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH 01/13] ARM: suspend: use hash of cpu_logical_map value to index into save array
  2013-07-26 14:41             ` Nicolas Pitre
@ 2013-07-26 15:34               ` Dave Martin
  2013-07-26 15:43                 ` Nicolas Pitre
  0 siblings, 1 reply; 43+ messages in thread
From: Dave Martin @ 2013-07-26 15:34 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, Jul 26, 2013 at 10:41:38AM -0400, Nicolas Pitre wrote:
> On Fri, 26 Jul 2013, Lorenzo Pieralisi wrote:
> 
> > On Thu, Jul 25, 2013 at 05:06:51PM +0100, Nicolas Pitre wrote:
> > > On Thu, 25 Jul 2013, Dave Martin wrote:
> > > 
> > > > On Wed, Jul 24, 2013 at 02:55:00PM -0400, Nicolas Pitre wrote:
> > > > > On Wed, 24 Jul 2013, Dave Martin wrote:
> > > > > 
> > > > > > But this patch commits us to requiring that on the suspend path 
> > > > > > specifically -- I think that ought to be mentioned somewhere. A 
> > > > > > comment in the preamble for __cpu_suspend would be enough, I think.
> > > > > 
> > > > > What comment would you suggest?  I want to make sure the possible 
> > > > > confusion you see is properly addressed.
> > > > 
> > > > I think we just need to state that the value of
> > > > cpu_logical_map(smp_processor_id()) must be the MPIDR of the physical
> > > > CPU the suspending logical CPU will resume on.  Consequently, if doing a
> > > > migration, cpu_logical_map() must be updated appropriately somewhere
> > > > between cpu_pm_enter() and cpu_suspend().
> > > 
> > > Excellent.  I've amended the patch with this:
> > > 
> > > diff --git a/arch/arm/kernel/suspend.c b/arch/arm/kernel/suspend.c
> > > index 2835d35234..caf938db62 100644
> > > --- a/arch/arm/kernel/suspend.c
> > > +++ b/arch/arm/kernel/suspend.c
> > > @@ -17,6 +17,11 @@ extern void cpu_resume_mmu(void);
> > >  /*
> > >   * Hide the first two arguments to __cpu_suspend - these are an implementation
> > >   * detail which platform code shouldn't have to know about.
> > > + *
> > > + * On SMP systems, the value of cpu_logical_map(smp_processor_id()) must be
> > > + * the MPIDR of the physical CPU the suspending logical CPU will resume on.
> > > + * Consequently, if doing a physical CPU migration, cpu_logical_map() must be
> > > + * updated appropriately somewhere between cpu_pm_enter() and cpu_suspend().
> > >   */
> > >  int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
> > >  {
> > > 
> > > I've put it against cpu_suspend() rather than __cpu_suspend(() as this 
> > > is what users should care about.
> > > 
> > > ACK?
> > 
> > We need this patch to allow IKS to store a cpu context at a specific
> > index, let's be honest. It is a moot point and I am not very happy
> > about changing this code for a very specific usage, but the way code is
> > implemented makes the change acceptable. I really do not think we should
> > write guidelines on how cpu_suspend users have to change cpu_logical_map
> > though, that's not needed apart from IKS and that should be limited to IKS
> > code only.
> > 
> > Again, that's just my opinion, but cpu_suspend API must be kept as it is
> > and we should not encourage people to use it in creative ways.
> 
> I tend to agree, but I'm now stuck between two conflicting requests.

Would it make sense to keep the same API to cpu_suspend(), but make it
a wrapper for another function which has the MPIDR argument?  Then people
calling cpu_suspend() continue as normal.  Only IKS needs to know about
the underlying MPIDR handling when calling this.

Cheers
---Dave

> 
> Are you saying you are willing to give me your ACK if I revert the 
> suggested change?
> 
> 
> Nicolas

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH 05/13] ARM: bL_switcher: move to dedicated threads rather than workqueues
  2013-07-26 15:18   ` Lorenzo Pieralisi
@ 2013-07-26 15:39     ` Nicolas Pitre
  0 siblings, 0 replies; 43+ messages in thread
From: Nicolas Pitre @ 2013-07-26 15:39 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, 26 Jul 2013, Lorenzo Pieralisi wrote:

> On Tue, Jul 23, 2013 at 04:31:21AM +0100, Nicolas Pitre wrote:
> > The workqueues are problematic as they may be contended.
> > They can't be scheduled with top priority either.  Also the optimization
> > in bL_switch_request() to skip the workqueue entirely when the target CPU
> > and the calling CPU were the same didn't allow for bL_switch_request() to
> > be called from atomic context, as might be the case for some cpufreq
> > drivers.
> > 
> > Let's move to dedicated kthreads instead.
> > 
> > Signed-off-by: Nicolas Pitre <nico@linaro.org>
> > ---
> >  arch/arm/common/bL_switcher.c      | 101 ++++++++++++++++++++++++++++---------
> >  arch/arm/include/asm/bL_switcher.h |   2 +-
> >  2 files changed, 79 insertions(+), 24 deletions(-)
> > 
> > diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
> > index d6f7e507d9..c2355cafc9 100644
> > --- a/arch/arm/common/bL_switcher.c
> > +++ b/arch/arm/common/bL_switcher.c
> > @@ -15,8 +15,10 @@
> >  #include <linux/sched.h>
> >  #include <linux/interrupt.h>
> >  #include <linux/cpu_pm.h>
> > +#include <linux/cpu.h>
> >  #include <linux/cpumask.h>
> > -#include <linux/workqueue.h>
> > +#include <linux/kthread.h>
> > +#include <linux/wait.h>
> >  #include <linux/clockchips.h>
> >  #include <linux/hrtimer.h>
> >  #include <linux/tick.h>
> > @@ -225,15 +227,48 @@ static int bL_switch_to(unsigned int new_cluster_id)
> >  	return ret;
> >  }
> >  
> > -struct switch_args {
> > -	unsigned int cluster;
> > -	struct work_struct work;
> > +struct bL_thread {
> > +	struct task_struct *task;
> > +	wait_queue_head_t wq;
> > +	int wanted_cluster;
> >  };
> >  
> > -static void __bL_switch_to(struct work_struct *work)
> > +static struct bL_thread bL_threads[MAX_CPUS_PER_CLUSTER];
> > +
> > +static int bL_switcher_thread(void *arg)
> > +{
> > +	struct bL_thread *t = arg;
> > +	struct sched_param param = { .sched_priority = 1 };
> > +	int cluster;
> > +
> > +	sched_setscheduler_nocheck(current, SCHED_FIFO, &param);
> > +
> > +	do {
> > +		if (signal_pending(current))
> > +			flush_signals(current);
> > +		wait_event_interruptible(t->wq,
> > +				t->wanted_cluster != -1 ||
> > +				kthread_should_stop());
> > +		cluster = xchg(&t->wanted_cluster, -1);
> > +		if (cluster != -1)
> > +			bL_switch_to(cluster);
> > +	} while (!kthread_should_stop());
> > +
> > +	return 0;
> > +}
> > +
> > +static struct task_struct * __init bL_switcher_thread_create(int cpu, void *arg)
> >  {
> > -	struct switch_args *args = container_of(work, struct switch_args, work);
> > -	bL_switch_to(args->cluster);
> > +	struct task_struct *task;
> > +
> > +	task = kthread_create_on_node(bL_switcher_thread, arg,
> > +				      cpu_to_node(cpu), "kswitcher_%d", cpu);
> > +	if (!IS_ERR(task)) {
> > +		kthread_bind(task, cpu);
> > +		wake_up_process(task);
> > +	} else
> > +		pr_err("%s failed for CPU %d\n", __func__, cpu);
> > +	return task;
> >  }
> >  
> >  /*
> > @@ -242,26 +277,46 @@ static void __bL_switch_to(struct work_struct *work)
> >   * @cpu: the CPU to switch
> >   * @new_cluster_id: the ID of the cluster to switch to.
> >   *
> > - * This function causes a cluster switch on the given CPU.  If the given
> > - * CPU is the same as the calling CPU then the switch happens right away.
> > - * Otherwise the request is put on a work queue to be scheduled on the
> > - * remote CPU.
> > + * This function causes a cluster switch on the given CPU by waking up
> > + * the appropriate switcher thread.  This function may or may not return
> > + * before the switch has occurred.
> >   */
> > -void bL_switch_request(unsigned int cpu, unsigned int new_cluster_id)
> > +int bL_switch_request(unsigned int cpu, unsigned int new_cluster_id)
> >  {
> > -	unsigned int this_cpu = get_cpu();
> > -	struct switch_args args;
> > +	struct bL_thread *t;
> >  
> > -	if (cpu == this_cpu) {
> > -		bL_switch_to(new_cluster_id);
> > -		put_cpu();
> > -		return;
> > +	if (cpu >= MAX_CPUS_PER_CLUSTER) {
> > +		pr_err("%s: cpu %d out of bounds\n", __func__, cpu);
> > +		return -EINVAL;
> >  	}
> > -	put_cpu();
> >  
> > -	args.cluster = new_cluster_id;
> > -	INIT_WORK_ONSTACK(&args.work, __bL_switch_to);
> > -	schedule_work_on(cpu, &args.work);
> > -	flush_work(&args.work);
> > +	t = &bL_threads[cpu];
> > +	if (IS_ERR(t->task))
> > +		return PTR_ERR(t->task);
> > +	if (!t->task)
> > +		return -ESRCH;
> > +
> > +	t->wanted_cluster = new_cluster_id;
> > +	wake_up(&t->wq);
> > +	return 0;
> >  }
> >  EXPORT_SYMBOL_GPL(bL_switch_request);
> > +
> > +static int __init bL_switcher_init(void)
> > +{
> > +	int cpu;
> > +
> > +	pr_info("big.LITTLE switcher initializing\n");
> > +
> > +	for_each_online_cpu(cpu) {
> > +		struct bL_thread *t = &bL_threads[cpu];
> > +		init_waitqueue_head(&t->wq);
> 
> Dereferencing &bL_threads[cpu] is wrong without checking the cpu index against
> MAX_CPUS_PER_CLUSTER. I know you hotplug out half of the CPUs in a later patch but
> still, this code needs fixing.

I've now declared bL_threads[] with NR_CPUS instead.  A later patch 
needed that to happen anyway.

> > +		t->wanted_cluster = -1;
> > +		t->task = bL_switcher_thread_create(cpu, t);
> > +	}
> > +
> > +	pr_info("big.LITTLE switcher initialized\n");
> > +	return 0;
> > +}
> > +
> > +late_initcall(bL_switcher_init);
> > diff --git a/arch/arm/include/asm/bL_switcher.h b/arch/arm/include/asm/bL_switcher.h
> > index 72efe3f349..e0c0bba70b 100644
> > --- a/arch/arm/include/asm/bL_switcher.h
> > +++ b/arch/arm/include/asm/bL_switcher.h
> > @@ -12,6 +12,6 @@
> >  #ifndef ASM_BL_SWITCHER_H
> >  #define ASM_BL_SWITCHER_H
> >  
> > -void bL_switch_request(unsigned int cpu, unsigned int new_cluster_id);
> > +int bL_switch_request(unsigned int cpu, unsigned int new_cluster_id);
> 
> We probably discussed this before, and it is not strictly related to
> this patch, but is locking/queuing necessary when requesting a switch ? I do
> not remember the outcome of the discussion so I am asking again.

Strictly speaking, there is no queueing/locking needed as we currently 
only request a state.  If we request the big cluster, and another 
request comes along for the little cluster in the mean time before the 
request to the big cluster was acted upon, then there is effectively 
nothing to do anymore and the fact that the previous request was 
overwritten is fine.

There is a patch introducing some kind of queueing in the part 2 of the 
whole series that I didn't post yet.  The only reason we need that is to 
call the cpufreq post notifier only after the switch is actually 
complete.  I'll post that along with the patches connecting cpufreq with 
this eventually.


Nicolas

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH 01/13] ARM: suspend: use hash of cpu_logical_map value to index into save array
  2013-07-26 15:34               ` Dave Martin
@ 2013-07-26 15:43                 ` Nicolas Pitre
  2013-07-26 15:45                   ` Dave Martin
  2013-07-26 17:04                   ` Lorenzo Pieralisi
  0 siblings, 2 replies; 43+ messages in thread
From: Nicolas Pitre @ 2013-07-26 15:43 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, 26 Jul 2013, Dave Martin wrote:

> On Fri, Jul 26, 2013 at 10:41:38AM -0400, Nicolas Pitre wrote:
> > On Fri, 26 Jul 2013, Lorenzo Pieralisi wrote:
> > 
> > > On Thu, Jul 25, 2013 at 05:06:51PM +0100, Nicolas Pitre wrote:
> > > > On Thu, 25 Jul 2013, Dave Martin wrote:
> > > > 
> > > > > On Wed, Jul 24, 2013 at 02:55:00PM -0400, Nicolas Pitre wrote:
> > > > > > On Wed, 24 Jul 2013, Dave Martin wrote:
> > > > > > 
> > > > > > > But this patch commits us to requiring that on the suspend path 
> > > > > > > specifically -- I think that ought to be mentioned somewhere. A 
> > > > > > > comment in the preamble for __cpu_suspend would be enough, I think.
> > > > > > 
> > > > > > What comment would you suggest?  I want to make sure the possible 
> > > > > > confusion you see is properly addressed.
> > > > > 
> > > > > I think we just need to state that the value of
> > > > > cpu_logical_map(smp_processor_id()) must be the MPIDR of the physical
> > > > > CPU the suspending logical CPU will resume on.  Consequently, if doing a
> > > > > migration, cpu_logical_map() must be updated appropriately somewhere
> > > > > between cpu_pm_enter() and cpu_suspend().
> > > > 
> > > > Excellent.  I've amended the patch with this:
> > > > 
> > > > diff --git a/arch/arm/kernel/suspend.c b/arch/arm/kernel/suspend.c
> > > > index 2835d35234..caf938db62 100644
> > > > --- a/arch/arm/kernel/suspend.c
> > > > +++ b/arch/arm/kernel/suspend.c
> > > > @@ -17,6 +17,11 @@ extern void cpu_resume_mmu(void);
> > > >  /*
> > > >   * Hide the first two arguments to __cpu_suspend - these are an implementation
> > > >   * detail which platform code shouldn't have to know about.
> > > > + *
> > > > + * On SMP systems, the value of cpu_logical_map(smp_processor_id()) must be
> > > > + * the MPIDR of the physical CPU the suspending logical CPU will resume on.
> > > > + * Consequently, if doing a physical CPU migration, cpu_logical_map() must be
> > > > + * updated appropriately somewhere between cpu_pm_enter() and cpu_suspend().
> > > >   */
> > > >  int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
> > > >  {
> > > > 
> > > > I've put it against cpu_suspend() rather than __cpu_suspend(() as this 
> > > > is what users should care about.
> > > > 
> > > > ACK?
> > > 
> > > We need this patch to allow IKS to store a cpu context at a specific
> > > index, let's be honest. It is a moot point and I am not very happy
> > > about changing this code for a very specific usage, but the way code is
> > > implemented makes the change acceptable. I really do not think we should
> > > write guidelines on how cpu_suspend users have to change cpu_logical_map
> > > though, that's not needed apart from IKS and that should be limited to IKS
> > > code only.
> > > 
> > > Again, that's just my opinion, but cpu_suspend API must be kept as it is
> > > and we should not encourage people to use it in creative ways.
> > 
> > I tend to agree, but I'm now stuck between two conflicting requests.
> 
> Would it make sense to keep the same API to cpu_suspend(), but make it
> a wrapper for another function which has the MPIDR argument?  Then people
> calling cpu_suspend() continue as normal.  Only IKS needs to know about
> the underlying MPIDR handling when calling this.

The fact is that the switcher _does_ need to swizzle the cpu_logical_map 
anyway before suspending.  Hence really there is no point creating extra
wrappers for this.

So Lorenzo's suggestion to simply not advertise this too much for people 
to not get too creative is probably the best compromize IMHO.


Nicolas

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH 01/13] ARM: suspend: use hash of cpu_logical_map value to index into save array
  2013-07-26 15:43                 ` Nicolas Pitre
@ 2013-07-26 15:45                   ` Dave Martin
  2013-07-26 17:04                   ` Lorenzo Pieralisi
  1 sibling, 0 replies; 43+ messages in thread
From: Dave Martin @ 2013-07-26 15:45 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, Jul 26, 2013 at 11:43:34AM -0400, Nicolas Pitre wrote:
> On Fri, 26 Jul 2013, Dave Martin wrote:
> 
> > On Fri, Jul 26, 2013 at 10:41:38AM -0400, Nicolas Pitre wrote:
> > > On Fri, 26 Jul 2013, Lorenzo Pieralisi wrote:
> > > 
> > > > On Thu, Jul 25, 2013 at 05:06:51PM +0100, Nicolas Pitre wrote:
> > > > > On Thu, 25 Jul 2013, Dave Martin wrote:
> > > > > 
> > > > > > On Wed, Jul 24, 2013 at 02:55:00PM -0400, Nicolas Pitre wrote:
> > > > > > > On Wed, 24 Jul 2013, Dave Martin wrote:
> > > > > > > 
> > > > > > > > But this patch commits us to requiring that on the suspend path 
> > > > > > > > specifically -- I think that ought to be mentioned somewhere. A 
> > > > > > > > comment in the preamble for __cpu_suspend would be enough, I think.
> > > > > > > 
> > > > > > > What comment would you suggest?  I want to make sure the possible 
> > > > > > > confusion you see is properly addressed.
> > > > > > 
> > > > > > I think we just need to state that the value of
> > > > > > cpu_logical_map(smp_processor_id()) must be the MPIDR of the physical
> > > > > > CPU the suspending logical CPU will resume on.  Consequently, if doing a
> > > > > > migration, cpu_logical_map() must be updated appropriately somewhere
> > > > > > between cpu_pm_enter() and cpu_suspend().
> > > > > 
> > > > > Excellent.  I've amended the patch with this:
> > > > > 
> > > > > diff --git a/arch/arm/kernel/suspend.c b/arch/arm/kernel/suspend.c
> > > > > index 2835d35234..caf938db62 100644
> > > > > --- a/arch/arm/kernel/suspend.c
> > > > > +++ b/arch/arm/kernel/suspend.c
> > > > > @@ -17,6 +17,11 @@ extern void cpu_resume_mmu(void);
> > > > >  /*
> > > > >   * Hide the first two arguments to __cpu_suspend - these are an implementation
> > > > >   * detail which platform code shouldn't have to know about.
> > > > > + *
> > > > > + * On SMP systems, the value of cpu_logical_map(smp_processor_id()) must be
> > > > > + * the MPIDR of the physical CPU the suspending logical CPU will resume on.
> > > > > + * Consequently, if doing a physical CPU migration, cpu_logical_map() must be
> > > > > + * updated appropriately somewhere between cpu_pm_enter() and cpu_suspend().
> > > > >   */
> > > > >  int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
> > > > >  {
> > > > > 
> > > > > I've put it against cpu_suspend() rather than __cpu_suspend(() as this 
> > > > > is what users should care about.
> > > > > 
> > > > > ACK?
> > > > 
> > > > We need this patch to allow IKS to store a cpu context at a specific
> > > > index, let's be honest. It is a moot point and I am not very happy
> > > > about changing this code for a very specific usage, but the way code is
> > > > implemented makes the change acceptable. I really do not think we should
> > > > write guidelines on how cpu_suspend users have to change cpu_logical_map
> > > > though, that's not needed apart from IKS and that should be limited to IKS
> > > > code only.
> > > > 
> > > > Again, that's just my opinion, but cpu_suspend API must be kept as it is
> > > > and we should not encourage people to use it in creative ways.
> > > 
> > > I tend to agree, but I'm now stuck between two conflicting requests.
> > 
> > Would it make sense to keep the same API to cpu_suspend(), but make it
> > a wrapper for another function which has the MPIDR argument?  Then people
> > calling cpu_suspend() continue as normal.  Only IKS needs to know about
> > the underlying MPIDR handling when calling this.
> 
> The fact is that the switcher _does_ need to swizzle the cpu_logical_map 
> anyway before suspending.  Hence really there is no point creating extra
> wrappers for this.
> 
> So Lorenzo's suggestion to simply not advertise this too much for people 
> to not get too creative is probably the best compromize IMHO.

Sure, I don't have a problem with that in light of the discussion.

Cheers
---Dave

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH 01/13] ARM: suspend: use hash of cpu_logical_map value to index into save array
  2013-07-26 15:43                 ` Nicolas Pitre
  2013-07-26 15:45                   ` Dave Martin
@ 2013-07-26 17:04                   ` Lorenzo Pieralisi
  2013-07-26 19:19                     ` Nicolas Pitre
  1 sibling, 1 reply; 43+ messages in thread
From: Lorenzo Pieralisi @ 2013-07-26 17:04 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, Jul 26, 2013 at 04:43:34PM +0100, Nicolas Pitre wrote:
> On Fri, 26 Jul 2013, Dave Martin wrote:
> 
> > On Fri, Jul 26, 2013 at 10:41:38AM -0400, Nicolas Pitre wrote:
> > > On Fri, 26 Jul 2013, Lorenzo Pieralisi wrote:
> > > 
> > > > On Thu, Jul 25, 2013 at 05:06:51PM +0100, Nicolas Pitre wrote:
> > > > > On Thu, 25 Jul 2013, Dave Martin wrote:
> > > > > 
> > > > > > On Wed, Jul 24, 2013 at 02:55:00PM -0400, Nicolas Pitre wrote:
> > > > > > > On Wed, 24 Jul 2013, Dave Martin wrote:
> > > > > > > 
> > > > > > > > But this patch commits us to requiring that on the suspend path 
> > > > > > > > specifically -- I think that ought to be mentioned somewhere. A 
> > > > > > > > comment in the preamble for __cpu_suspend would be enough, I think.
> > > > > > > 
> > > > > > > What comment would you suggest?  I want to make sure the possible 
> > > > > > > confusion you see is properly addressed.
> > > > > > 
> > > > > > I think we just need to state that the value of
> > > > > > cpu_logical_map(smp_processor_id()) must be the MPIDR of the physical
> > > > > > CPU the suspending logical CPU will resume on.  Consequently, if doing a
> > > > > > migration, cpu_logical_map() must be updated appropriately somewhere
> > > > > > between cpu_pm_enter() and cpu_suspend().
> > > > > 
> > > > > Excellent.  I've amended the patch with this:
> > > > > 
> > > > > diff --git a/arch/arm/kernel/suspend.c b/arch/arm/kernel/suspend.c
> > > > > index 2835d35234..caf938db62 100644
> > > > > --- a/arch/arm/kernel/suspend.c
> > > > > +++ b/arch/arm/kernel/suspend.c
> > > > > @@ -17,6 +17,11 @@ extern void cpu_resume_mmu(void);
> > > > >  /*
> > > > >   * Hide the first two arguments to __cpu_suspend - these are an implementation
> > > > >   * detail which platform code shouldn't have to know about.
> > > > > + *
> > > > > + * On SMP systems, the value of cpu_logical_map(smp_processor_id()) must be
> > > > > + * the MPIDR of the physical CPU the suspending logical CPU will resume on.
> > > > > + * Consequently, if doing a physical CPU migration, cpu_logical_map() must be
> > > > > + * updated appropriately somewhere between cpu_pm_enter() and cpu_suspend().
> > > > >   */
> > > > >  int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
> > > > >  {
> > > > > 
> > > > > I've put it against cpu_suspend() rather than __cpu_suspend(() as this 
> > > > > is what users should care about.
> > > > > 
> > > > > ACK?
> > > > 
> > > > We need this patch to allow IKS to store a cpu context at a specific
> > > > index, let's be honest. It is a moot point and I am not very happy
> > > > about changing this code for a very specific usage, but the way code is
> > > > implemented makes the change acceptable. I really do not think we should
> > > > write guidelines on how cpu_suspend users have to change cpu_logical_map
> > > > though, that's not needed apart from IKS and that should be limited to IKS
> > > > code only.
> > > > 
> > > > Again, that's just my opinion, but cpu_suspend API must be kept as it is
> > > > and we should not encourage people to use it in creative ways.
> > > 
> > > I tend to agree, but I'm now stuck between two conflicting requests.
> > 
> > Would it make sense to keep the same API to cpu_suspend(), but make it
> > a wrapper for another function which has the MPIDR argument?  Then people
> > calling cpu_suspend() continue as normal.  Only IKS needs to know about
> > the underlying MPIDR handling when calling this.
> 
> The fact is that the switcher _does_ need to swizzle the cpu_logical_map 
> anyway before suspending.  Hence really there is no point creating extra
> wrappers for this.
> 
> So Lorenzo's suggestion to simply not advertise this too much for people 
> to not get too creative is probably the best compromize IMHO.

We could add a function cpu_migrate, almost identical to cpu_suspend
but accepting an MPIDR parameter as Dave said. cpu_suspend would read its
own MPIDR using read_cpuid_mpidr() and pass it to __cpu_suspend, keeping the
current behaviour unchanged (well, MPIDR is now read in cpu_suspend instead
of __cpu_suspend).
cpu_migrate will pass the mpidr to __cpu_suspend, to say "migrate this CPU to
the CPU identified by this MPIDR"; IKS can swizzle cpu_logical_map internally.

Looks like lot of churn though, probably the intent is clearer and closer to
PSCI. Just an idea, probably useless.

I will apply and test the patch as it is first thing next week.

Lorenzo

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH 01/13] ARM: suspend: use hash of cpu_logical_map value to index into save array
  2013-07-26 17:04                   ` Lorenzo Pieralisi
@ 2013-07-26 19:19                     ` Nicolas Pitre
  0 siblings, 0 replies; 43+ messages in thread
From: Nicolas Pitre @ 2013-07-26 19:19 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, 26 Jul 2013, Lorenzo Pieralisi wrote:

> On Fri, Jul 26, 2013 at 04:43:34PM +0100, Nicolas Pitre wrote:
> > On Fri, 26 Jul 2013, Dave Martin wrote:
> > 
> > > On Fri, Jul 26, 2013 at 10:41:38AM -0400, Nicolas Pitre wrote:
> > > > On Fri, 26 Jul 2013, Lorenzo Pieralisi wrote:
> > > > 
> > > > > On Thu, Jul 25, 2013 at 05:06:51PM +0100, Nicolas Pitre wrote:
> > > > > > On Thu, 25 Jul 2013, Dave Martin wrote:
> > > > > > 
> > > > > > > On Wed, Jul 24, 2013 at 02:55:00PM -0400, Nicolas Pitre wrote:
> > > > > > > > On Wed, 24 Jul 2013, Dave Martin wrote:
> > > > > > > > 
> > > > > > > > > But this patch commits us to requiring that on the suspend path 
> > > > > > > > > specifically -- I think that ought to be mentioned somewhere. A 
> > > > > > > > > comment in the preamble for __cpu_suspend would be enough, I think.
> > > > > > > > 
> > > > > > > > What comment would you suggest?  I want to make sure the possible 
> > > > > > > > confusion you see is properly addressed.
> > > > > > > 
> > > > > > > I think we just need to state that the value of
> > > > > > > cpu_logical_map(smp_processor_id()) must be the MPIDR of the physical
> > > > > > > CPU the suspending logical CPU will resume on.  Consequently, if doing a
> > > > > > > migration, cpu_logical_map() must be updated appropriately somewhere
> > > > > > > between cpu_pm_enter() and cpu_suspend().
> > > > > > 
> > > > > > Excellent.  I've amended the patch with this:
> > > > > > 
> > > > > > diff --git a/arch/arm/kernel/suspend.c b/arch/arm/kernel/suspend.c
> > > > > > index 2835d35234..caf938db62 100644
> > > > > > --- a/arch/arm/kernel/suspend.c
> > > > > > +++ b/arch/arm/kernel/suspend.c
> > > > > > @@ -17,6 +17,11 @@ extern void cpu_resume_mmu(void);
> > > > > >  /*
> > > > > >   * Hide the first two arguments to __cpu_suspend - these are an implementation
> > > > > >   * detail which platform code shouldn't have to know about.
> > > > > > + *
> > > > > > + * On SMP systems, the value of cpu_logical_map(smp_processor_id()) must be
> > > > > > + * the MPIDR of the physical CPU the suspending logical CPU will resume on.
> > > > > > + * Consequently, if doing a physical CPU migration, cpu_logical_map() must be
> > > > > > + * updated appropriately somewhere between cpu_pm_enter() and cpu_suspend().
> > > > > >   */
> > > > > >  int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
> > > > > >  {
> > > > > > 
> > > > > > I've put it against cpu_suspend() rather than __cpu_suspend(() as this 
> > > > > > is what users should care about.
> > > > > > 
> > > > > > ACK?
> > > > > 
> > > > > We need this patch to allow IKS to store a cpu context at a specific
> > > > > index, let's be honest. It is a moot point and I am not very happy
> > > > > about changing this code for a very specific usage, but the way code is
> > > > > implemented makes the change acceptable. I really do not think we should
> > > > > write guidelines on how cpu_suspend users have to change cpu_logical_map
> > > > > though, that's not needed apart from IKS and that should be limited to IKS
> > > > > code only.
> > > > > 
> > > > > Again, that's just my opinion, but cpu_suspend API must be kept as it is
> > > > > and we should not encourage people to use it in creative ways.
> > > > 
> > > > I tend to agree, but I'm now stuck between two conflicting requests.
> > > 
> > > Would it make sense to keep the same API to cpu_suspend(), but make it
> > > a wrapper for another function which has the MPIDR argument?  Then people
> > > calling cpu_suspend() continue as normal.  Only IKS needs to know about
> > > the underlying MPIDR handling when calling this.
> > 
> > The fact is that the switcher _does_ need to swizzle the cpu_logical_map 
> > anyway before suspending.  Hence really there is no point creating extra
> > wrappers for this.
> > 
> > So Lorenzo's suggestion to simply not advertise this too much for people 
> > to not get too creative is probably the best compromize IMHO.
> 
> We could add a function cpu_migrate, almost identical to cpu_suspend
> but accepting an MPIDR parameter as Dave said. cpu_suspend would read its
> own MPIDR using read_cpuid_mpidr() and pass it to __cpu_suspend, keeping the
> current behaviour unchanged (well, MPIDR is now read in cpu_suspend instead
> of __cpu_suspend).
> cpu_migrate will pass the mpidr to __cpu_suspend, to say "migrate this CPU to
> the CPU identified by this MPIDR"; IKS can swizzle cpu_logical_map internally.

Sorry but aren't we getting overboard with this?

- IKS _has_ to swizzle cpu_logical_map

- The suspend path has to get the MPIDR somehow

- This patch keeps the change to a minimum

- We don't necessarily want to "publish" a formal interface for this 
  trick

> Looks like lot of churn though, probably the intent is clearer and closer to
> PSCI. Just an idea, probably useless.

PSCI has to be invoked at a different level.  And frankly we don't need 
to look for analogies to justify an extra interface which we may as well 
dispense with.

> I will apply and test the patch as it is first thing next week.

Thanks.

FYI this patch is already in Linaro's kernel and passed all our testing.


Nicolas

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH 01/13] ARM: suspend: use hash of cpu_logical_map value to index into save array
  2013-07-23  3:31 ` [PATCH 01/13] ARM: suspend: use hash of cpu_logical_map value to index into save array Nicolas Pitre
  2013-07-24 16:47   ` Dave Martin
@ 2013-07-29 11:50   ` Lorenzo Pieralisi
  2013-07-30  2:08     ` Nicolas Pitre
  1 sibling, 1 reply; 43+ messages in thread
From: Lorenzo Pieralisi @ 2013-07-29 11:50 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Jul 23, 2013 at 04:31:17AM +0100, Nicolas Pitre wrote:
> Currently we hash the MPIDR of the CPU being suspended to determine which
> entry in the sleep_save_sp array to use. In some situations, such as when
> we want to resume on another physical CPU, the MPIDR of another CPU should
> be used instead.
> 
> So let's use the value of cpu_logical_map(smp_processor_id()) in place
> of the MPIDR in the suspend path.  This will result in the same index
> being used as with the previous code unless the caller has modified
> cpu_logical_map() beforehand.

I will rely on your wisdom to rewrite the commit log in a way that
does not hint at dangerous creativity, if you catch my drift :D

> The register allocation in __cpu_suspend is reworked in order to better
> accommodate the additional argument.
> 
> Signed-off-by: Nicolas Pitre <nico@linaro.org>
> ---
>  arch/arm/kernel/sleep.S   | 25 +++++++++++--------------
>  arch/arm/kernel/suspend.c |  8 +++++---
>  2 files changed, 16 insertions(+), 17 deletions(-)
> 
> diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S
> index db1536b8b3..836d10e698 100644
> --- a/arch/arm/kernel/sleep.S
> +++ b/arch/arm/kernel/sleep.S
> @@ -55,6 +55,7 @@
>   * specific registers and some other data for resume.
>   *  r0 = suspend function arg0
>   *  r1 = suspend function
> + *  r2 = MPIDR value used to index into sleep_save_sp

CPU's MPIDR or something like that, let's not hint at possible creative usage.

>   */
>  ENTRY(__cpu_suspend)
>  	stmfd	sp!, {r4 - r11, lr}
> @@ -67,23 +68,19 @@ ENTRY(__cpu_suspend)
>  	mov	r5, sp			@ current virtual SP
>  	add	r4, r4, #12		@ Space for pgd, virt sp, phys resume fn
>  	sub	sp, sp, r4		@ allocate CPU state on stack
> -	stmfd	sp!, {r0, r1}		@ save suspend func arg and pointer
> -	add	r0, sp, #8		@ save pointer to save block
> -	mov	r1, r4			@ size of save block
> -	mov	r2, r5			@ virtual SP
>  	ldr	r3, =sleep_save_sp
> +	stmfd	sp!, {r0, r1}		@ save suspend func arg and pointer
>  	ldr	r3, [r3, #SLEEP_SAVE_SP_VIRT]
> -	ALT_SMP(mrc p15, 0, r9, c0, c0, 5)
> -        ALT_UP_B(1f)
> -	ldr	r8, =mpidr_hash
> -	/*
> -	 * This ldmia relies on the memory layout of the mpidr_hash
> -	 * struct mpidr_hash.
> -	 */
> -	ldmia	r8, {r4-r7}	@ r4 = mpidr mask (r5,r6,r7) = l[0,1,2] shifts
> -	compute_mpidr_hash	lr, r5, r6, r7, r9, r4
> -	add	r3, r3, lr, lsl #2
> +	ALT_SMP(ldr r0, =mpidr_hash)
> +       ALT_UP_B(1f)

Should be a tab, do not know if that's an email server issue or not.

> +	/* This ldmia relies on the memory layout of the mpidr_hash struct */
> +	ldmia	r0, {r1, r6-r8}	@ r1 = mpidr mask (r6,r7,r8) = l[0,1,2] shifts
> +	compute_mpidr_hash	r0, r6, r7, r8, r2, r1
> +	add	r3, r3, r0, lsl #2
>  1:
> +	mov	r2, r5			@ virtual SP
> +	mov	r1, r4			@ size of save block
> +	add	r0, sp, #8		@ pointer to save block

It is already a bit complex to follow, code is ok, but line above was
better placed closer to sp last change, not after hashing the MPIDR.

>  	bl	__cpu_suspend_save
>  	adr	lr, BSYM(cpu_suspend_abort)
>  	ldmfd	sp!, {r0, pc}		@ call suspend fn
> diff --git a/arch/arm/kernel/suspend.c b/arch/arm/kernel/suspend.c
> index 41cf3cbf75..2835d35234 100644
> --- a/arch/arm/kernel/suspend.c
> +++ b/arch/arm/kernel/suspend.c
> @@ -10,7 +10,7 @@
>  #include <asm/suspend.h>
>  #include <asm/tlbflush.h>
>  
> -extern int __cpu_suspend(unsigned long, int (*)(unsigned long));
> +extern int __cpu_suspend(unsigned long, int (*)(unsigned long), u32 cpuid);
>  extern void cpu_resume_mmu(void);
>  
>  #ifdef CONFIG_MMU
> @@ -21,6 +21,7 @@ extern void cpu_resume_mmu(void);
>  int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
>  {
>  	struct mm_struct *mm = current->active_mm;
> +	u32 __mpidr = cpu_logical_map(smp_processor_id());
>  	int ret;
>  
>  	if (!idmap_pgd)
> @@ -32,7 +33,7 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
>  	 * resume (indicated by a zero return code), we need to switch
>  	 * back to the correct page tables.
>  	 */
> -	ret = __cpu_suspend(arg, fn);
> +	ret = __cpu_suspend(arg, fn, __mpidr);
>  	if (ret == 0) {
>  		cpu_switch_mm(mm->pgd, mm);
>  		local_flush_bp_all();
> @@ -44,7 +45,8 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
>  #else
>  int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
>  {
> -	return __cpu_suspend(arg, fn);
> +	u32 __mpidr = cpu_logical_map(smp_processor_id());
> +	return __cpu_suspend(arg, fn, __mpidr);
>  }
>  #define	idmap_pgd	NULL
>  #endif

Apart from these nits, let's hope it is the last cpu_suspend bit we need to
change, please land it on -next asap, of course if Russell is ok with that.

I tested it on TC2.

FWIW:

Acked-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH 01/13] ARM: suspend: use hash of cpu_logical_map value to index into save array
  2013-07-29 11:50   ` Lorenzo Pieralisi
@ 2013-07-30  2:08     ` Nicolas Pitre
  2013-07-30  9:15       ` Dave Martin
  0 siblings, 1 reply; 43+ messages in thread
From: Nicolas Pitre @ 2013-07-30  2:08 UTC (permalink / raw)
  To: linux-arm-kernel

On Mon, 29 Jul 2013, Lorenzo Pieralisi wrote:

> On Tue, Jul 23, 2013 at 04:31:17AM +0100, Nicolas Pitre wrote:
> > Currently we hash the MPIDR of the CPU being suspended to determine which
> > entry in the sleep_save_sp array to use. In some situations, such as when
> > we want to resume on another physical CPU, the MPIDR of another CPU should
> > be used instead.
> > 
> > So let's use the value of cpu_logical_map(smp_processor_id()) in place
> > of the MPIDR in the suspend path.  This will result in the same index
> > being used as with the previous code unless the caller has modified
> > cpu_logical_map() beforehand.
> 
> I will rely on your wisdom to rewrite the commit log in a way that
> does not hint at dangerous creativity, if you catch my drift :D

I've augmented it with some of the earlier comments from Dave Martin.  
I think it is fine to be clear in the commit log about what we intend 
this change to be used for.

> > The register allocation in __cpu_suspend is reworked in order to better
> > accommodate the additional argument.
> > 
> > Signed-off-by: Nicolas Pitre <nico@linaro.org>
> > ---
> >  arch/arm/kernel/sleep.S   | 25 +++++++++++--------------
> >  arch/arm/kernel/suspend.c |  8 +++++---
> >  2 files changed, 16 insertions(+), 17 deletions(-)
> > 
> > diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S
> > index db1536b8b3..836d10e698 100644
> > --- a/arch/arm/kernel/sleep.S
> > +++ b/arch/arm/kernel/sleep.S
> > @@ -55,6 +55,7 @@
> >   * specific registers and some other data for resume.
> >   *  r0 = suspend function arg0
> >   *  r1 = suspend function
> > + *  r2 = MPIDR value used to index into sleep_save_sp
> 
> CPU's MPIDR or something like that, let's not hint at possible creative usage.

What about "MPIDR of the CPU to be resumed" which should normally just 
be the current CPU's?

After all, creativity is not always a bad thing given it is reviewed 
properly.

> >   */
> >  ENTRY(__cpu_suspend)
> >  	stmfd	sp!, {r4 - r11, lr}
> > @@ -67,23 +68,19 @@ ENTRY(__cpu_suspend)
> >  	mov	r5, sp			@ current virtual SP
> >  	add	r4, r4, #12		@ Space for pgd, virt sp, phys resume fn
> >  	sub	sp, sp, r4		@ allocate CPU state on stack
> > -	stmfd	sp!, {r0, r1}		@ save suspend func arg and pointer
> > -	add	r0, sp, #8		@ save pointer to save block
> > -	mov	r1, r4			@ size of save block
> > -	mov	r2, r5			@ virtual SP
> >  	ldr	r3, =sleep_save_sp
> > +	stmfd	sp!, {r0, r1}		@ save suspend func arg and pointer
> >  	ldr	r3, [r3, #SLEEP_SAVE_SP_VIRT]
> > -	ALT_SMP(mrc p15, 0, r9, c0, c0, 5)
> > -        ALT_UP_B(1f)
> > -	ldr	r8, =mpidr_hash
> > -	/*
> > -	 * This ldmia relies on the memory layout of the mpidr_hash
> > -	 * struct mpidr_hash.
> > -	 */
> > -	ldmia	r8, {r4-r7}	@ r4 = mpidr mask (r5,r6,r7) = l[0,1,2] shifts
> > -	compute_mpidr_hash	lr, r5, r6, r7, r9, r4
> > -	add	r3, r3, lr, lsl #2
> > +	ALT_SMP(ldr r0, =mpidr_hash)
> > +       ALT_UP_B(1f)
> 
> Should be a tab, do not know if that's an email server issue or not.

Amistake on my part.

> > +	/* This ldmia relies on the memory layout of the mpidr_hash struct */
> > +	ldmia	r0, {r1, r6-r8}	@ r1 = mpidr mask (r6,r7,r8) = l[0,1,2] shifts
> > +	compute_mpidr_hash	r0, r6, r7, r8, r2, r1
> > +	add	r3, r3, r0, lsl #2
> >  1:
> > +	mov	r2, r5			@ virtual SP
> > +	mov	r1, r4			@ size of save block
> > +	add	r0, sp, #8		@ pointer to save block
> 
> It is already a bit complex to follow, code is ok, but line above was
> better placed closer to sp last change, not after hashing the MPIDR.

Well, I tend to disagree.  I think it is much clearer to set function 
arguments closer to the call site so that r0 doesn't have to be live 
with the stack location throughout this code.

> >  	bl	__cpu_suspend_save
> >  	adr	lr, BSYM(cpu_suspend_abort)
> >  	ldmfd	sp!, {r0, pc}		@ call suspend fn
> > diff --git a/arch/arm/kernel/suspend.c b/arch/arm/kernel/suspend.c
> > index 41cf3cbf75..2835d35234 100644
> > --- a/arch/arm/kernel/suspend.c
> > +++ b/arch/arm/kernel/suspend.c
> > @@ -10,7 +10,7 @@
> >  #include <asm/suspend.h>
> >  #include <asm/tlbflush.h>
> >  
> > -extern int __cpu_suspend(unsigned long, int (*)(unsigned long));
> > +extern int __cpu_suspend(unsigned long, int (*)(unsigned long), u32 cpuid);
> >  extern void cpu_resume_mmu(void);
> >  
> >  #ifdef CONFIG_MMU
> > @@ -21,6 +21,7 @@ extern void cpu_resume_mmu(void);
> >  int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
> >  {
> >  	struct mm_struct *mm = current->active_mm;
> > +	u32 __mpidr = cpu_logical_map(smp_processor_id());
> >  	int ret;
> >  
> >  	if (!idmap_pgd)
> > @@ -32,7 +33,7 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
> >  	 * resume (indicated by a zero return code), we need to switch
> >  	 * back to the correct page tables.
> >  	 */
> > -	ret = __cpu_suspend(arg, fn);
> > +	ret = __cpu_suspend(arg, fn, __mpidr);
> >  	if (ret == 0) {
> >  		cpu_switch_mm(mm->pgd, mm);
> >  		local_flush_bp_all();
> > @@ -44,7 +45,8 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
> >  #else
> >  int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
> >  {
> > -	return __cpu_suspend(arg, fn);
> > +	u32 __mpidr = cpu_logical_map(smp_processor_id());
> > +	return __cpu_suspend(arg, fn, __mpidr);
> >  }
> >  #define	idmap_pgd	NULL
> >  #endif
> 
> Apart from these nits, let's hope it is the last cpu_suspend bit we need to
> change, please land it on -next asap, of course if Russell is ok with that.
> 
> I tested it on TC2.
> 
> FWIW:
> 
> Acked-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>

Thanks.


Nicolas

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH 01/13] ARM: suspend: use hash of cpu_logical_map value to index into save array
  2013-07-30  2:08     ` Nicolas Pitre
@ 2013-07-30  9:15       ` Dave Martin
  0 siblings, 0 replies; 43+ messages in thread
From: Dave Martin @ 2013-07-30  9:15 UTC (permalink / raw)
  To: linux-arm-kernel

On Mon, Jul 29, 2013 at 10:08:06PM -0400, Nicolas Pitre wrote:
> On Mon, 29 Jul 2013, Lorenzo Pieralisi wrote:
> 
> > On Tue, Jul 23, 2013 at 04:31:17AM +0100, Nicolas Pitre wrote:
> > > Currently we hash the MPIDR of the CPU being suspended to determine which
> > > entry in the sleep_save_sp array to use. In some situations, such as when
> > > we want to resume on another physical CPU, the MPIDR of another CPU should
> > > be used instead.
> > > 
> > > So let's use the value of cpu_logical_map(smp_processor_id()) in place
> > > of the MPIDR in the suspend path.  This will result in the same index
> > > being used as with the previous code unless the caller has modified
> > > cpu_logical_map() beforehand.
> > 
> > I will rely on your wisdom to rewrite the commit log in a way that
> > does not hint at dangerous creativity, if you catch my drift :D
> 
> I've augmented it with some of the earlier comments from Dave Martin.  
> I think it is fine to be clear in the commit log about what we intend 
> this change to be used for.
> 
> > > The register allocation in __cpu_suspend is reworked in order to better
> > > accommodate the additional argument.
> > > 
> > > Signed-off-by: Nicolas Pitre <nico@linaro.org>
> > > ---
> > >  arch/arm/kernel/sleep.S   | 25 +++++++++++--------------
> > >  arch/arm/kernel/suspend.c |  8 +++++---
> > >  2 files changed, 16 insertions(+), 17 deletions(-)
> > > 
> > > diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S
> > > index db1536b8b3..836d10e698 100644
> > > --- a/arch/arm/kernel/sleep.S
> > > +++ b/arch/arm/kernel/sleep.S
> > > @@ -55,6 +55,7 @@
> > >   * specific registers and some other data for resume.
> > >   *  r0 = suspend function arg0
> > >   *  r1 = suspend function
> > > + *  r2 = MPIDR value used to index into sleep_save_sp
> > 
> > CPU's MPIDR or something like that, let's not hint at possible creative usage.
> 
> What about "MPIDR of the CPU to be resumed" which should normally just 
> be the current CPU's?
> 
> After all, creativity is not always a bad thing given it is reviewed 
> properly.

OK, sounds reasonable.

> 
> > >   */
> > >  ENTRY(__cpu_suspend)
> > >  	stmfd	sp!, {r4 - r11, lr}
> > > @@ -67,23 +68,19 @@ ENTRY(__cpu_suspend)
> > >  	mov	r5, sp			@ current virtual SP
> > >  	add	r4, r4, #12		@ Space for pgd, virt sp, phys resume fn
> > >  	sub	sp, sp, r4		@ allocate CPU state on stack
> > > -	stmfd	sp!, {r0, r1}		@ save suspend func arg and pointer
> > > -	add	r0, sp, #8		@ save pointer to save block
> > > -	mov	r1, r4			@ size of save block
> > > -	mov	r2, r5			@ virtual SP
> > >  	ldr	r3, =sleep_save_sp
> > > +	stmfd	sp!, {r0, r1}		@ save suspend func arg and pointer
> > >  	ldr	r3, [r3, #SLEEP_SAVE_SP_VIRT]
> > > -	ALT_SMP(mrc p15, 0, r9, c0, c0, 5)
> > > -        ALT_UP_B(1f)
> > > -	ldr	r8, =mpidr_hash
> > > -	/*
> > > -	 * This ldmia relies on the memory layout of the mpidr_hash
> > > -	 * struct mpidr_hash.
> > > -	 */
> > > -	ldmia	r8, {r4-r7}	@ r4 = mpidr mask (r5,r6,r7) = l[0,1,2] shifts
> > > -	compute_mpidr_hash	lr, r5, r6, r7, r9, r4
> > > -	add	r3, r3, lr, lsl #2
> > > +	ALT_SMP(ldr r0, =mpidr_hash)
> > > +       ALT_UP_B(1f)
> > 
> > Should be a tab, do not know if that's an email server issue or not.
> 
> Amistake on my part.
> 
> > > +	/* This ldmia relies on the memory layout of the mpidr_hash struct */
> > > +	ldmia	r0, {r1, r6-r8}	@ r1 = mpidr mask (r6,r7,r8) = l[0,1,2] shifts
> > > +	compute_mpidr_hash	r0, r6, r7, r8, r2, r1
> > > +	add	r3, r3, r0, lsl #2
> > >  1:
> > > +	mov	r2, r5			@ virtual SP
> > > +	mov	r1, r4			@ size of save block
> > > +	add	r0, sp, #8		@ pointer to save block
> > 
> > It is already a bit complex to follow, code is ok, but line above was
> > better placed closer to sp last change, not after hashing the MPIDR.
> 
> Well, I tend to disagree.  I think it is much clearer to set function 
> arguments closer to the call site so that r0 doesn't have to be live 
> with the stack location throughout this code.

Fair point, but I'm happy either way.

With or without, feel free to add

Reviewed-by: Dave Martin <Dave.Martin@arm.com>

Cheers
---Dave

> 
> > >  	bl	__cpu_suspend_save
> > >  	adr	lr, BSYM(cpu_suspend_abort)
> > >  	ldmfd	sp!, {r0, pc}		@ call suspend fn
> > > diff --git a/arch/arm/kernel/suspend.c b/arch/arm/kernel/suspend.c
> > > index 41cf3cbf75..2835d35234 100644
> > > --- a/arch/arm/kernel/suspend.c
> > > +++ b/arch/arm/kernel/suspend.c
> > > @@ -10,7 +10,7 @@
> > >  #include <asm/suspend.h>
> > >  #include <asm/tlbflush.h>
> > >  
> > > -extern int __cpu_suspend(unsigned long, int (*)(unsigned long));
> > > +extern int __cpu_suspend(unsigned long, int (*)(unsigned long), u32 cpuid);
> > >  extern void cpu_resume_mmu(void);
> > >  
> > >  #ifdef CONFIG_MMU
> > > @@ -21,6 +21,7 @@ extern void cpu_resume_mmu(void);
> > >  int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
> > >  {
> > >  	struct mm_struct *mm = current->active_mm;
> > > +	u32 __mpidr = cpu_logical_map(smp_processor_id());
> > >  	int ret;
> > >  
> > >  	if (!idmap_pgd)
> > > @@ -32,7 +33,7 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
> > >  	 * resume (indicated by a zero return code), we need to switch
> > >  	 * back to the correct page tables.
> > >  	 */
> > > -	ret = __cpu_suspend(arg, fn);
> > > +	ret = __cpu_suspend(arg, fn, __mpidr);
> > >  	if (ret == 0) {
> > >  		cpu_switch_mm(mm->pgd, mm);
> > >  		local_flush_bp_all();
> > > @@ -44,7 +45,8 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
> > >  #else
> > >  int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
> > >  {
> > > -	return __cpu_suspend(arg, fn);
> > > +	u32 __mpidr = cpu_logical_map(smp_processor_id());
> > > +	return __cpu_suspend(arg, fn, __mpidr);
> > >  }
> > >  #define	idmap_pgd	NULL
> > >  #endif
> > 
> > Apart from these nits, let's hope it is the last cpu_suspend bit we need to
> > change, please land it on -next asap, of course if Russell is ok with that.
> > 
> > I tested it on TC2.
> > 
> > FWIW:
> > 
> > Acked-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
> 
> Thanks.
> 
> 
> Nicolas

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH 12/13] ARM: bL_switcher: remove assumptions between logical and physical CPUs
  2013-07-23  3:31 ` [PATCH 12/13] ARM: bL_switcher: remove assumptions between logical and physical CPUs Nicolas Pitre
@ 2013-07-30 16:30   ` Lorenzo Pieralisi
  2013-07-30 19:15     ` Nicolas Pitre
  0 siblings, 1 reply; 43+ messages in thread
From: Lorenzo Pieralisi @ 2013-07-30 16:30 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Jul 23, 2013 at 04:31:28AM +0100, Nicolas Pitre wrote:
> Up to now, the logical CPU was somehow tied to the physical CPU number
> within a cluster.  This causes problems when forcing the boot CPU to be
> different from the first enumerated CPU in the device tree creating a
> discrepancy between logical and physical CPU numbers.
> 
> Let's make the pairing completely independent from physical CPU numbers.
> 
> Let's keep only those logical CPUs with same initial CPU cluster to create
> a uniform scheduler profile without having to modify any of the probed
> topology and compute capacity data.  This has the potential to create
> a non contiguous CPU numbering space when the switcher is active with
> potential impact on buggy user space tools.  It is however better to fix
> those tools rather than making the switcher code more intrusive.

Nico, I am not following you here. Is this not the same problem we
have in a system where we hotplug out some of the CPUs in the logical map ?

Actually you are just hotplugging out some cpus according to a switcher
specific policy, if the problem is there, it is there already, unless
you are referring to MIDRs of the online CPUs that in the switcher case
might change at run-time for a specific logical index, but that happens
regardless.

[...]

>  static void bL_switcher_restore_cpus(void)
> @@ -319,52 +322,86 @@ static void bL_switcher_restore_cpus(void)
>  
>  static int bL_switcher_halve_cpus(void)
>  {
> -	int cpu, cluster, i, ret;
> -	cpumask_t cluster_mask[2], common_mask;
> -
> -	cpumask_clear(&bL_switcher_removed_logical_cpus);
> -	cpumask_clear(&cluster_mask[0]);
> -	cpumask_clear(&cluster_mask[1]);
> +	int i, j, cluster_0, gic_id, ret;
> +	unsigned int cpu, cluster, mask;
> +	cpumask_t available_cpus;
>  
> +	/* First pass to validate what we have */
> +	mask = 0;
>  	for_each_online_cpu(i) {
> -		cpu = cpu_logical_map(i) & 0xff;
> -		cluster = (cpu_logical_map(i) >> 8) & 0xff;
> +		cpu = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 0);
> +		cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 1);
>  		if (cluster >= 2) {
>  			pr_err("%s: only dual cluster systems are supported\n", __func__);
>  			return -EINVAL;
>  		}
> -		cpumask_set_cpu(cpu, &cluster_mask[cluster]);
> +		if (WARN_ON(cpu >= MAX_CPUS_PER_CLUSTER))

pr_warn ?

> +			return -EINVAL;
> +		mask |= (1 << cluster);
>  	}
> -
> -	if (!cpumask_and(&common_mask, &cluster_mask[0], &cluster_mask[1])) {
> -		pr_err("%s: no common set of CPUs\n", __func__);
> +	if (mask != 3) {

Technically, you might have two clusters with eg MPIDR[15:8] = {0,2}
this would break this code. Just saying, I know for now we can live with that.
Given that's the only usage of mask you might use a counter, just as well.

> +		pr_err("%s: no CPU pairing possible\n", __func__);
>  		return -EINVAL;
>  	}
>  
> -	for_each_online_cpu(i) {
> -		cpu = cpu_logical_map(i) & 0xff;
> -		cluster = (cpu_logical_map(i) >> 8) & 0xff;
> -
> -		if (cpumask_test_cpu(cpu, &common_mask)) {
> -			/* Let's take note of the GIC ID for this CPU */
> -			int gic_id = gic_get_cpu_id(i);
> -			if (gic_id < 0) {
> -				pr_err("%s: bad GIC ID for CPU %d\n", __func__, i);
> -				return -EINVAL;
> -			}
> -			bL_gic_id[cpu][cluster] = gic_id;
> -			pr_info("GIC ID for CPU %u cluster %u is %u\n",
> -				cpu, cluster, gic_id);
> -
> +	/*
> +	 * Now let's do the pairing.  We match each CPU with another CPU
> +	 * from a different cluster.  To get a uniform scheduling behavior
> +	 * without fiddling with CPU topology and compute capacity data,
> +	 * we'll use logical CPUs initially belonging to the same cluster.
> +	 */
> +	memset(bL_switcher_cpu_pairing, -1, sizeof(bL_switcher_cpu_pairing));
> +	cpumask_copy(&available_cpus, cpu_online_mask);
> +	cluster_0 = -1;
> +	for_each_cpu(i, &available_cpus) {
> +		int match = -1;
> +		cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 1);
> +		if (cluster_0 == -1)
> +			cluster_0 = cluster;
> +		if (cluster != cluster_0)
> +			continue;
> +		cpumask_clear_cpu(i, &available_cpus);
> +		for_each_cpu(j, &available_cpus) {
> +			cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(j), 1);
>  			/*
> -			 * We keep only those logical CPUs which number
> -			 * is equal to their physical CPU number. This is
> -			 * not perfect but good enough for now.
> +			 * Let's remember the last match to create "odd"
> +			 * pairings on purpose in order for other code not
> +			 * to assume any relation between physical and
> +			 * logical CPU numbers.
>  			 */
> -			if (cpu == i) {
> -				bL_switcher_cpu_original_cluster[cpu] = cluster;
> -				continue;
> -			}
> +			if (cluster != cluster_0)
> +				match = j;
> +		}
> +		if (match != -1) {
> +			bL_switcher_cpu_pairing[i] = match;
> +			cpumask_clear_cpu(match, &available_cpus);
> +			pr_info("CPU%d paired with CPU%d\n", i, match);
> +		}
> +	}
> +
> +	/*
> +	 * Now we disable the unwanted CPUs i.e. everything that has no
> +	 * pairing information (that includes the pairing counterparts).
> +	 */
> +	cpumask_clear(&bL_switcher_removed_logical_cpus);
> +	for_each_online_cpu(i) {
> +		cpu = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 0);
> +		cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 1);
> +
> +		/* Let's take note of the GIC ID for this CPU */
> +		gic_id = gic_get_cpu_id(i);
> +		if (gic_id < 0) {
> +			pr_err("%s: bad GIC ID for CPU %d\n", __func__, i);
> +			bL_switcher_restore_cpus();
> +			return -EINVAL;
> +		}
> +		bL_gic_id[cpu][cluster] = gic_id;
> +		pr_info("GIC ID for CPU %u cluster %u is %u\n",
> +			cpu, cluster, gic_id);
> +
> +		if (bL_switcher_cpu_pairing[i] != -1) {
> +			bL_switcher_cpu_original_cluster[i] = cluster;
> +			continue;
>  		}
>  
>  		ret = cpu_down(i);
> @@ -415,7 +452,7 @@ static int bL_switcher_enable(void)
>  
>  static void bL_switcher_disable(void)
>  {
> -	unsigned int cpu, cluster, i;
> +	unsigned int cpu, cluster;
>  	struct bL_thread *t;
>  	struct task_struct *task;
>  
> @@ -435,7 +472,6 @@ static void bL_switcher_disable(void)
>  	 * possibility for interference from external requests.
>  	 */
>  	for_each_online_cpu(cpu) {
> -		BUG_ON(cpu != (cpu_logical_map(cpu) & 0xff));
>  		t = &bL_threads[cpu];
>  		task = t->task;
>  		t->task = NULL;
> @@ -459,14 +495,10 @@ static void bL_switcher_disable(void)
>  		/* If execution gets here, we're in trouble. */
>  		pr_crit("%s: unable to restore original cluster for CPU %d\n",
>  			__func__, cpu);
> -		for_each_cpu(i, &bL_switcher_removed_logical_cpus) {
> -			if ((cpu_logical_map(i) & 0xff) != cpu)
> -				continue;
> -			pr_crit("%s: CPU %d can't be restored\n",
> -				__func__, i);
> -			cpumask_clear_cpu(i, &bL_switcher_removed_logical_cpus);
> -			break;
> -		}
> +		pr_crit("%s: CPU %d can't be restored\n",
> +			__func__, bL_switcher_cpu_pairing[cpu]);
> +		cpumask_clear_cpu(bL_switcher_cpu_pairing[cpu],
> +				  &bL_switcher_removed_logical_cpus);
>  	}
>  
>  	bL_switcher_restore_cpus();
> -- 

Other than that:

Reviewed-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH 12/13] ARM: bL_switcher: remove assumptions between logical and physical CPUs
  2013-07-30 16:30   ` Lorenzo Pieralisi
@ 2013-07-30 19:15     ` Nicolas Pitre
  2013-07-31  9:41       ` Lorenzo Pieralisi
  0 siblings, 1 reply; 43+ messages in thread
From: Nicolas Pitre @ 2013-07-30 19:15 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, 30 Jul 2013, Lorenzo Pieralisi wrote:

> On Tue, Jul 23, 2013 at 04:31:28AM +0100, Nicolas Pitre wrote:
> > Up to now, the logical CPU was somehow tied to the physical CPU number
> > within a cluster.  This causes problems when forcing the boot CPU to be
> > different from the first enumerated CPU in the device tree creating a
> > discrepancy between logical and physical CPU numbers.
> > 
> > Let's make the pairing completely independent from physical CPU numbers.
> > 
> > Let's keep only those logical CPUs with same initial CPU cluster to create
> > a uniform scheduler profile without having to modify any of the probed
> > topology and compute capacity data.  This has the potential to create
> > a non contiguous CPU numbering space when the switcher is active with
> > potential impact on buggy user space tools.  It is however better to fix
> > those tools rather than making the switcher code more intrusive.
> 
> Nico, I am not following you here. Is this not the same problem we
> have in a system where we hotplug out some of the CPUs in the logical map ?

Yes, it is.  Although with the switcher, user space does initialize with 
holes in the logical CPU map since some CPUs have been hotplugged out 
before the boot completed which is something some tools are not 
expecting.  And it did cause problems with Android for example, although 
that has been fixed now.

> [...]
> 
> >  static void bL_switcher_restore_cpus(void)
> > @@ -319,52 +322,86 @@ static void bL_switcher_restore_cpus(void)
> >  
> >  static int bL_switcher_halve_cpus(void)
> >  {
> > -	int cpu, cluster, i, ret;
> > -	cpumask_t cluster_mask[2], common_mask;
> > -
> > -	cpumask_clear(&bL_switcher_removed_logical_cpus);
> > -	cpumask_clear(&cluster_mask[0]);
> > -	cpumask_clear(&cluster_mask[1]);
> > +	int i, j, cluster_0, gic_id, ret;
> > +	unsigned int cpu, cluster, mask;
> > +	cpumask_t available_cpus;
> >  
> > +	/* First pass to validate what we have */
> > +	mask = 0;
> >  	for_each_online_cpu(i) {
> > -		cpu = cpu_logical_map(i) & 0xff;
> > -		cluster = (cpu_logical_map(i) >> 8) & 0xff;
> > +		cpu = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 0);
> > +		cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 1);
> >  		if (cluster >= 2) {
> >  			pr_err("%s: only dual cluster systems are supported\n", __func__);
> >  			return -EINVAL;
> >  		}
> > -		cpumask_set_cpu(cpu, &cluster_mask[cluster]);
> > +		if (WARN_ON(cpu >= MAX_CPUS_PER_CLUSTER))
> 
> pr_warn ?

I dunno.  What's the policy for choosing between those two?
Obviously this is a "shouldn't happen" case.

> 
> > +			return -EINVAL;
> > +		mask |= (1 << cluster);
> >  	}
> > -
> > -	if (!cpumask_and(&common_mask, &cluster_mask[0], &cluster_mask[1])) {
> > -		pr_err("%s: no common set of CPUs\n", __func__);
> > +	if (mask != 3) {
> 
> Technically, you might have two clusters with eg MPIDR[15:8] = {0,2}
> this would break this code. Just saying, I know for now we can live with that.
> Given that's the only usage of mask you might use a counter, just as well.

Well, there are assumptions about the cluster encoding all over the 
place, so this is a stop gap to trap anything which is not following the 
0,1 enumeration.

This assumption could be fixed eventually if it turns to e false on some 
hardware.

> > +		pr_err("%s: no CPU pairing possible\n", __func__);
> >  		return -EINVAL;
> >  	}
> >  
> > -	for_each_online_cpu(i) {
> > -		cpu = cpu_logical_map(i) & 0xff;
> > -		cluster = (cpu_logical_map(i) >> 8) & 0xff;
> > -
> > -		if (cpumask_test_cpu(cpu, &common_mask)) {
> > -			/* Let's take note of the GIC ID for this CPU */
> > -			int gic_id = gic_get_cpu_id(i);
> > -			if (gic_id < 0) {
> > -				pr_err("%s: bad GIC ID for CPU %d\n", __func__, i);
> > -				return -EINVAL;
> > -			}
> > -			bL_gic_id[cpu][cluster] = gic_id;
> > -			pr_info("GIC ID for CPU %u cluster %u is %u\n",
> > -				cpu, cluster, gic_id);
> > -
> > +	/*
> > +	 * Now let's do the pairing.  We match each CPU with another CPU
> > +	 * from a different cluster.  To get a uniform scheduling behavior
> > +	 * without fiddling with CPU topology and compute capacity data,
> > +	 * we'll use logical CPUs initially belonging to the same cluster.
> > +	 */
> > +	memset(bL_switcher_cpu_pairing, -1, sizeof(bL_switcher_cpu_pairing));
> > +	cpumask_copy(&available_cpus, cpu_online_mask);
> > +	cluster_0 = -1;
> > +	for_each_cpu(i, &available_cpus) {
> > +		int match = -1;
> > +		cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 1);
> > +		if (cluster_0 == -1)
> > +			cluster_0 = cluster;
> > +		if (cluster != cluster_0)
> > +			continue;
> > +		cpumask_clear_cpu(i, &available_cpus);
> > +		for_each_cpu(j, &available_cpus) {
> > +			cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(j), 1);
> >  			/*
> > -			 * We keep only those logical CPUs which number
> > -			 * is equal to their physical CPU number. This is
> > -			 * not perfect but good enough for now.
> > +			 * Let's remember the last match to create "odd"
> > +			 * pairings on purpose in order for other code not
> > +			 * to assume any relation between physical and
> > +			 * logical CPU numbers.
> >  			 */
> > -			if (cpu == i) {
> > -				bL_switcher_cpu_original_cluster[cpu] = cluster;
> > -				continue;
> > -			}
> > +			if (cluster != cluster_0)
> > +				match = j;
> > +		}
> > +		if (match != -1) {
> > +			bL_switcher_cpu_pairing[i] = match;
> > +			cpumask_clear_cpu(match, &available_cpus);
> > +			pr_info("CPU%d paired with CPU%d\n", i, match);
> > +		}
> > +	}
> > +
> > +	/*
> > +	 * Now we disable the unwanted CPUs i.e. everything that has no
> > +	 * pairing information (that includes the pairing counterparts).
> > +	 */
> > +	cpumask_clear(&bL_switcher_removed_logical_cpus);
> > +	for_each_online_cpu(i) {
> > +		cpu = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 0);
> > +		cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 1);
> > +
> > +		/* Let's take note of the GIC ID for this CPU */
> > +		gic_id = gic_get_cpu_id(i);
> > +		if (gic_id < 0) {
> > +			pr_err("%s: bad GIC ID for CPU %d\n", __func__, i);
> > +			bL_switcher_restore_cpus();
> > +			return -EINVAL;
> > +		}
> > +		bL_gic_id[cpu][cluster] = gic_id;
> > +		pr_info("GIC ID for CPU %u cluster %u is %u\n",
> > +			cpu, cluster, gic_id);
> > +
> > +		if (bL_switcher_cpu_pairing[i] != -1) {
> > +			bL_switcher_cpu_original_cluster[i] = cluster;
> > +			continue;
> >  		}
> >  
> >  		ret = cpu_down(i);
> > @@ -415,7 +452,7 @@ static int bL_switcher_enable(void)
> >  
> >  static void bL_switcher_disable(void)
> >  {
> > -	unsigned int cpu, cluster, i;
> > +	unsigned int cpu, cluster;
> >  	struct bL_thread *t;
> >  	struct task_struct *task;
> >  
> > @@ -435,7 +472,6 @@ static void bL_switcher_disable(void)
> >  	 * possibility for interference from external requests.
> >  	 */
> >  	for_each_online_cpu(cpu) {
> > -		BUG_ON(cpu != (cpu_logical_map(cpu) & 0xff));
> >  		t = &bL_threads[cpu];
> >  		task = t->task;
> >  		t->task = NULL;
> > @@ -459,14 +495,10 @@ static void bL_switcher_disable(void)
> >  		/* If execution gets here, we're in trouble. */
> >  		pr_crit("%s: unable to restore original cluster for CPU %d\n",
> >  			__func__, cpu);
> > -		for_each_cpu(i, &bL_switcher_removed_logical_cpus) {
> > -			if ((cpu_logical_map(i) & 0xff) != cpu)
> > -				continue;
> > -			pr_crit("%s: CPU %d can't be restored\n",
> > -				__func__, i);
> > -			cpumask_clear_cpu(i, &bL_switcher_removed_logical_cpus);
> > -			break;
> > -		}
> > +		pr_crit("%s: CPU %d can't be restored\n",
> > +			__func__, bL_switcher_cpu_pairing[cpu]);
> > +		cpumask_clear_cpu(bL_switcher_cpu_pairing[cpu],
> > +				  &bL_switcher_removed_logical_cpus);
> >  	}
> >  
> >  	bL_switcher_restore_cpus();
> > -- 
> 
> Other than that:
> 
> Reviewed-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>

Thanks.


Nicolas


> 

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH 12/13] ARM: bL_switcher: remove assumptions between logical and physical CPUs
  2013-07-30 19:15     ` Nicolas Pitre
@ 2013-07-31  9:41       ` Lorenzo Pieralisi
  0 siblings, 0 replies; 43+ messages in thread
From: Lorenzo Pieralisi @ 2013-07-31  9:41 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Jul 30, 2013 at 08:15:02PM +0100, Nicolas Pitre wrote:
> On Tue, 30 Jul 2013, Lorenzo Pieralisi wrote:
> 
> > On Tue, Jul 23, 2013 at 04:31:28AM +0100, Nicolas Pitre wrote:
> > > Up to now, the logical CPU was somehow tied to the physical CPU number
> > > within a cluster.  This causes problems when forcing the boot CPU to be
> > > different from the first enumerated CPU in the device tree creating a
> > > discrepancy between logical and physical CPU numbers.
> > > 
> > > Let's make the pairing completely independent from physical CPU numbers.
> > > 
> > > Let's keep only those logical CPUs with same initial CPU cluster to create
> > > a uniform scheduler profile without having to modify any of the probed
> > > topology and compute capacity data.  This has the potential to create
> > > a non contiguous CPU numbering space when the switcher is active with
> > > potential impact on buggy user space tools.  It is however better to fix
> > > those tools rather than making the switcher code more intrusive.
> > 
> > Nico, I am not following you here. Is this not the same problem we
> > have in a system where we hotplug out some of the CPUs in the logical map ?
> 
> Yes, it is.  Although with the switcher, user space does initialize with 
> holes in the logical CPU map since some CPUs have been hotplugged out 
> before the boot completed which is something some tools are not 
> expecting.  And it did cause problems with Android for example, although 
> that has been fixed now.

Well, fixing them is a good thing to do, as you mentioned, so it is not
your problem.

> > [...]
> > 
> > >  static void bL_switcher_restore_cpus(void)
> > > @@ -319,52 +322,86 @@ static void bL_switcher_restore_cpus(void)
> > >  
> > >  static int bL_switcher_halve_cpus(void)
> > >  {
> > > -	int cpu, cluster, i, ret;
> > > -	cpumask_t cluster_mask[2], common_mask;
> > > -
> > > -	cpumask_clear(&bL_switcher_removed_logical_cpus);
> > > -	cpumask_clear(&cluster_mask[0]);
> > > -	cpumask_clear(&cluster_mask[1]);
> > > +	int i, j, cluster_0, gic_id, ret;
> > > +	unsigned int cpu, cluster, mask;
> > > +	cpumask_t available_cpus;
> > >  
> > > +	/* First pass to validate what we have */
> > > +	mask = 0;
> > >  	for_each_online_cpu(i) {
> > > -		cpu = cpu_logical_map(i) & 0xff;
> > > -		cluster = (cpu_logical_map(i) >> 8) & 0xff;
> > > +		cpu = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 0);
> > > +		cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 1);
> > >  		if (cluster >= 2) {
> > >  			pr_err("%s: only dual cluster systems are supported\n", __func__);
> > >  			return -EINVAL;
> > >  		}
> > > -		cpumask_set_cpu(cpu, &cluster_mask[cluster]);
> > > +		if (WARN_ON(cpu >= MAX_CPUS_PER_CLUSTER))
> > 
> > pr_warn ?
> 
> I dunno.  What's the policy for choosing between those two?
> Obviously this is a "shouldn't happen" case.

For sure we have to bail out of there if that's hit, whether you need a
stack dump or not, I do not know, probably we don't, but that's arguable.

> > > +			return -EINVAL;
> > > +		mask |= (1 << cluster);
> > >  	}
> > > -
> > > -	if (!cpumask_and(&common_mask, &cluster_mask[0], &cluster_mask[1])) {
> > > -		pr_err("%s: no common set of CPUs\n", __func__);
> > > +	if (mask != 3) {
> > 
> > Technically, you might have two clusters with eg MPIDR[15:8] = {0,2}
> > this would break this code. Just saying, I know for now we can live with that.
> > Given that's the only usage of mask you might use a counter, just as well.
> 
> Well, there are assumptions about the cluster encoding all over the 
> place, so this is a stop gap to trap anything which is not following the 
> 0,1 enumeration.
> 
> This assumption could be fixed eventually if it turns to e false on some 
> hardware.

Yes, you are absolutely right, for now as I mentioned let's live with this
restriction.

Lorenzo

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH 11/13] ARM: bL_switcher: veto CPU hotplug requests when the switcher is active
  2013-07-23  3:31 ` [PATCH 11/13] ARM: bL_switcher: veto CPU hotplug requests when the switcher is active Nicolas Pitre
@ 2013-07-31 10:30   ` Lorenzo Pieralisi
  2013-08-05  4:25     ` Nicolas Pitre
  0 siblings, 1 reply; 43+ messages in thread
From: Lorenzo Pieralisi @ 2013-07-31 10:30 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Jul 23, 2013 at 04:31:27AM +0100, Nicolas Pitre wrote:
> Trying to support both the switcher and CPU hotplug at the same time
> is quickly becoming very complex due to ambiguous semantics.  So let's
> simply veto any hotplug requests when the switcher is active for now.
> 
> This restriction might be loosened eventually.
> 
> Signed-off-by: Nicolas Pitre <nico@linaro.org>
> ---
>  arch/arm/common/bL_switcher.c | 21 +++++++++++++++++++++
>  1 file changed, 21 insertions(+)
> 
> diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
> index 704c4b4ef3..2fe3911601 100644
> --- a/arch/arm/common/bL_switcher.c
> +++ b/arch/arm/common/bL_switcher.c
> @@ -528,6 +528,25 @@ static int __init bL_switcher_sysfs_init(void)
>  
>  #endif  /* CONFIG_SYSFS */
>  
> +/*
> + * Veto any CPU hotplug operation while the switcher is active.
> + * We're just not ready to deal with that given the trickery involved.
> + */
> +static int bL_switcher_hotplug_callback(struct notifier_block *nfb,
> +					unsigned long action, void *hcpu)
> +{
> +	switch (action) {
> +	case CPU_UP_PREPARE:
> +	case CPU_DOWN_PREPARE:
> +		if (bL_switcher_active)
> +			return NOTIFY_BAD;

This is a bit of a sledgehammer. It implies that S2R can't be implemented when
the switcher is active. Are we really sure that hotplug, given MCPM HW
CPUs awareness, is incompatible with the switcher as it stands ?

What are the main issues that have to be tackled ?

Thanks,
Lorenzo

> +	}
> +	return NOTIFY_DONE;
> +}
> +
> +static struct notifier_block bL_switcher_hotplug_notifier =
> +	{ &bL_switcher_hotplug_callback, NULL, 0 };
> +
>  static bool no_bL_switcher;
>  core_param(no_bL_switcher, no_bL_switcher, bool, 0644);
>  
> @@ -540,6 +559,8 @@ static int __init bL_switcher_init(void)
>  		return -EINVAL;
>  	}
>  
> +	register_cpu_notifier(&bL_switcher_hotplug_notifier);
> +
>  	if (!no_bL_switcher) {
>  		ret = bL_switcher_enable();
>  		if (ret)
> -- 
> 1.8.1.2
> 
> 
> 

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH 11/13] ARM: bL_switcher: veto CPU hotplug requests when the switcher is active
  2013-07-31 10:30   ` Lorenzo Pieralisi
@ 2013-08-05  4:25     ` Nicolas Pitre
  0 siblings, 0 replies; 43+ messages in thread
From: Nicolas Pitre @ 2013-08-05  4:25 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, 31 Jul 2013, Lorenzo Pieralisi wrote:

> On Tue, Jul 23, 2013 at 04:31:27AM +0100, Nicolas Pitre wrote:
> > Trying to support both the switcher and CPU hotplug at the same time
> > is quickly becoming very complex due to ambiguous semantics.  So let's
> > simply veto any hotplug requests when the switcher is active for now.
> > 
> > This restriction might be loosened eventually.
> > 
> > Signed-off-by: Nicolas Pitre <nico@linaro.org>
> > ---
> >  arch/arm/common/bL_switcher.c | 21 +++++++++++++++++++++
> >  1 file changed, 21 insertions(+)
> > 
> > diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
> > index 704c4b4ef3..2fe3911601 100644
> > --- a/arch/arm/common/bL_switcher.c
> > +++ b/arch/arm/common/bL_switcher.c
> > @@ -528,6 +528,25 @@ static int __init bL_switcher_sysfs_init(void)
> >  
> >  #endif  /* CONFIG_SYSFS */
> >  
> > +/*
> > + * Veto any CPU hotplug operation while the switcher is active.
> > + * We're just not ready to deal with that given the trickery involved.
> > + */
> > +static int bL_switcher_hotplug_callback(struct notifier_block *nfb,
> > +					unsigned long action, void *hcpu)
> > +{
> > +	switch (action) {
> > +	case CPU_UP_PREPARE:
> > +	case CPU_DOWN_PREPARE:
> > +		if (bL_switcher_active)
> > +			return NOTIFY_BAD;
> 
> This is a bit of a sledgehammer. It implies that S2R can't be implemented when
> the switcher is active. Are we really sure that hotplug, given MCPM HW
> CPUs awareness, is incompatible with the switcher as it stands ?
> 
> What are the main issues that have to be tackled ?

It's about semantics.  Let's say the switcher pairs CPU0 with CPU2 and 
CPU1 with CPU3, effectively keeping only logical CPUS 0 and 1 active.

Obviously, we must disallow hotplugging CPUs 2 and 3.

But what if CPU0 is removed, and then the switcher is disabled?  Should 
we bring up CPU2 or not?  What if right before the disabling of the 
switcher, logical CPU0 was switched to CPU2?  Etc.

So I decided that there is no right answer, and maybe we shouldn't care 
that much about those semantic issues around the runtime disabling of 
the switcher.

So I've replaced this patch with the following one:

----- >8
[PATCH] ARM: bL_switcher: filter CPU hotplug requests when the switcher is active

Trying to support both the switcher and CPU hotplug at the same time
is tricky due to ambiguous semantics.  So let's at least prevent users
from messing around with those logical CPUs the switcher has removed
and those which were not active when the switcher was activated.

Signed-off-by: Nicolas Pitre <nico@linaro.org>

diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
index 0e4fb3e5e9..335ff76d4c 100644
--- a/arch/arm/common/bL_switcher.c
+++ b/arch/arm/common/bL_switcher.c
@@ -554,6 +554,26 @@ static int __init bL_switcher_sysfs_init(void)
 
 #endif  /* CONFIG_SYSFS */
 
+/*
+ * Veto any CPU hotplug operation on those CPUs we've removed
+ * while the switcher is active.
+ * We're just not ready to deal with that given the trickery involved.
+ */
+static int bL_switcher_hotplug_callback(struct notifier_block *nfb,
+					unsigned long action, void *hcpu)
+{
+	if (bL_switcher_active) {
+		int pairing = bL_switcher_cpu_pairing[(unsigned long)hcpu];
+		switch (action & 0xf) {
+		case CPU_UP_PREPARE:
+		case CPU_DOWN_PREPARE:
+			if (pairing == -1)
+				return NOTIFY_BAD;
+		}
+	}
+	return NOTIFY_DONE;
+}
+
 static bool no_bL_switcher;
 core_param(no_bL_switcher, no_bL_switcher, bool, 0644);
 
@@ -566,6 +586,8 @@ static int __init bL_switcher_init(void)
 		return -EINVAL;
 	}
 
+	cpu_notifier(bL_switcher_hotplug_callback, 0);
+
 	if (!no_bL_switcher) {
 		ret = bL_switcher_enable();
 		if (ret)

^ permalink raw reply related	[flat|nested] 43+ messages in thread

end of thread, other threads:[~2013-08-05  4:25 UTC | newest]

Thread overview: 43+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-07-23  3:31 [PATCH 00/13] The big.LITTLE In-Kernel Switcher (IKS), part 1 Nicolas Pitre
2013-07-23  3:31 ` [PATCH 01/13] ARM: suspend: use hash of cpu_logical_map value to index into save array Nicolas Pitre
2013-07-24 16:47   ` Dave Martin
2013-07-24 18:55     ` Nicolas Pitre
2013-07-25 10:55       ` Dave Martin
2013-07-25 16:06         ` Nicolas Pitre
2013-07-26 11:31           ` Lorenzo Pieralisi
2013-07-26 14:41             ` Nicolas Pitre
2013-07-26 15:34               ` Dave Martin
2013-07-26 15:43                 ` Nicolas Pitre
2013-07-26 15:45                   ` Dave Martin
2013-07-26 17:04                   ` Lorenzo Pieralisi
2013-07-26 19:19                     ` Nicolas Pitre
2013-07-29 11:50   ` Lorenzo Pieralisi
2013-07-30  2:08     ` Nicolas Pitre
2013-07-30  9:15       ` Dave Martin
2013-07-23  3:31 ` [PATCH 02/13] ARM: gic: add CPU migration support Nicolas Pitre
2013-07-25 11:44   ` Jonathan Austin
2013-07-25 19:11     ` Nicolas Pitre
2013-07-23  3:31 ` [PATCH 03/13] ARM: b.L: core switcher code Nicolas Pitre
2013-07-25 17:15   ` Jonathan Austin
2013-07-25 20:20     ` Nicolas Pitre
2013-07-26 10:45   ` Lorenzo Pieralisi
2013-07-26 14:29     ` Nicolas Pitre
2013-07-26 14:53   ` Russell King - ARM Linux
2013-07-26 15:10     ` Nicolas Pitre
2013-07-23  3:31 ` [PATCH 04/13] ARM: bL_switcher: add clockevent save/restore support Nicolas Pitre
2013-07-23  3:31 ` [PATCH 05/13] ARM: bL_switcher: move to dedicated threads rather than workqueues Nicolas Pitre
2013-07-26 15:18   ` Lorenzo Pieralisi
2013-07-26 15:39     ` Nicolas Pitre
2013-07-23  3:31 ` [PATCH 06/13] ARM: bL_switcher: simplify stack isolation Nicolas Pitre
2013-07-23  3:31 ` [PATCH 07/13] ARM: bL_switcher: hot-unplug half of the available CPUs Nicolas Pitre
2013-07-23  3:31 ` [PATCH 08/13] ARM: bL_switcher: do not hardcode GIC IDs in the code Nicolas Pitre
2013-07-23  3:31 ` [PATCH 09/13] ARM: bL_switcher: ability to enable and disable the switcher via sysfs Nicolas Pitre
2013-07-23  3:31 ` [PATCH 10/13] ARM: bL_switcher: add kernel cmdline param to disable the switcher on boot Nicolas Pitre
2013-07-23  3:31 ` [PATCH 11/13] ARM: bL_switcher: veto CPU hotplug requests when the switcher is active Nicolas Pitre
2013-07-31 10:30   ` Lorenzo Pieralisi
2013-08-05  4:25     ` Nicolas Pitre
2013-07-23  3:31 ` [PATCH 12/13] ARM: bL_switcher: remove assumptions between logical and physical CPUs Nicolas Pitre
2013-07-30 16:30   ` Lorenzo Pieralisi
2013-07-30 19:15     ` Nicolas Pitre
2013-07-31  9:41       ` Lorenzo Pieralisi
2013-07-23  3:31 ` [PATCH 13/13] ARM: bL_switcher: add a simple /dev user interface for debugging purposes Nicolas Pitre

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.