All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 01/13] arm64: Add ESR_EL2_EC macros to hyp-stub
  2014-09-09 22:51 ` Geoff Levand
@ 2014-09-09 22:49   ` Geoff Levand
  -1 siblings, 0 replies; 80+ messages in thread
From: Geoff Levand @ 2014-09-09 22:49 UTC (permalink / raw)
  To: linux-arm-kernel

To improve the readability of the el1_sync routine in hyp-stub.S replace the
numeric immediate values with preprocessor macros ESR_EL2_EC_SHIFT and
ESR_EL2_EC_HVC64.

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/kernel/hyp-stub.S | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index a272f33..2d960a9 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -52,10 +52,13 @@ ENDPROC(__hyp_stub_vectors)
 
 	.align 11
 
+#define ESR_EL2_EC_SHIFT	26
+#define ESR_EL2_EC_HVC64	0x16
+
 el1_sync:
 	mrs	x1, esr_el2
-	lsr	x1, x1, #26
-	cmp	x1, #0x16
+	lsr	x1, x1, #ESR_EL2_EC_SHIFT
+	cmp	x1, #ESR_EL2_EC_HVC64
 	b.ne	2f				// Not an HVC trap
 	cbz	x0, 1f
 	msr	vbar_el2, x0			// Set vbar_el2
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [PATCH 03/13] arm64: Convert hcalls to use ISS field
  2014-09-09 22:51 ` Geoff Levand
@ 2014-09-09 22:49   ` Geoff Levand
  -1 siblings, 0 replies; 80+ messages in thread
From: Geoff Levand @ 2014-09-09 22:49 UTC (permalink / raw)
  To: linux-arm-kernel

To allow for additional hcalls to be defined and to make the arm64 hcall API
more consistent across exception vector routines change the hcall implementations
to use the ISS field of the ESR_EL2 register to specify the hcall type.

The existing arm64 hcall implementations are limited in that they only allow
for two distinct hcalls; with the x0 register either zero, or not zero.  Also,
the API of the hyp-stub exception vector routines and the KVM exception vector
routines differ; hyp-stub uses a non-zero value in x0 to implement
__hyp_set_vectors, whereas KVM uses it to implement kvm_call_hyp.

Define three new preprocessor macros HVC_GET_VECTORS, HVC_SET_VECTORS and
HVC_KVM_CALL_HYP and to be used as hcall type specifiers and convert the
existing __hyp_get_vectors(), __hyp_set_vectors() and kvm_call_hyp() routines
to use these new macros when executing and HVC call.  Also change the
corresponding hyp-stub and KVM el1_sync exception vector routines to use these
new macros.

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/include/asm/virt.h | 20 ++++++++++++++++++++
 arch/arm64/kernel/hyp-stub.S  | 38 ++++++++++++++++++++++++++------------
 arch/arm64/kvm/hyp.S          | 19 ++++++++++++-------
 3 files changed, 58 insertions(+), 19 deletions(-)

diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 7a5df52..894fe53 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -21,6 +21,26 @@
 #define BOOT_CPU_MODE_EL1	(0xe11)
 #define BOOT_CPU_MODE_EL2	(0xe12)
 
+/*
+ * HVC_GET_VECTORS - Return the value of the vbar_el2 register.
+ */
+
+#define HVC_GET_VECTORS 1
+
+/*
+ * HVC_SET_VECTORS - Set the value of the vbar_el2 register.
+ *
+ * @x0: Physical address of the new vector table.
+ */
+
+#define HVC_SET_VECTORS 2
+
+/*
+ * HVC_KVM_CALL_HYP - Execute kvm_call_hyp routine.
+ */
+
+#define HVC_KVM_CALL_HYP 3
+
 #ifndef __ASSEMBLY__
 
 /*
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index 2d960a9..9ab5f70 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -54,16 +54,29 @@ ENDPROC(__hyp_stub_vectors)
 
 #define ESR_EL2_EC_SHIFT	26
 #define ESR_EL2_EC_HVC64	0x16
+#define ESR_EL2_ISS		0xffff
 
 el1_sync:
-	mrs	x1, esr_el2
-	lsr	x1, x1, #ESR_EL2_EC_SHIFT
-	cmp	x1, #ESR_EL2_EC_HVC64
-	b.ne	2f				// Not an HVC trap
-	cbz	x0, 1f
-	msr	vbar_el2, x0			// Set vbar_el2
+	mrs	x10, esr_el2
+	lsr	x9, x10, #ESR_EL2_EC_SHIFT	// x9=EC
+	and	x10, x10, #ESR_EL2_ISS		// x10=ISS
+
+	cmp     x9, #ESR_EL2_EC_HVC64
+	b.ne    2f                              // Not a host HVC trap
+
+	mrs     x9, vttbr_el2
+	cbnz    x9, 2f                          // Not a host HVC trap
+
+	cmp	x10, #HVC_GET_VECTORS
+	b.ne	1f
+	mrs	x0, vbar_el2
 	b	2f
-1:	mrs	x0, vbar_el2			// Return vbar_el2
+
+1:	cmp	x10, #HVC_SET_VECTORS
+	b.ne	1f
+	msr	vbar_el2, x0
+
+1:
 2:	eret
 ENDPROC(el1_sync)
 
@@ -103,11 +116,12 @@ ENDPROC(\label)
  * initialisation entry point.
  */
 
-ENTRY(__hyp_get_vectors)
-	mov	x0, xzr
-	// fall through
 ENTRY(__hyp_set_vectors)
-	hvc	#0
+	hvc	#HVC_SET_VECTORS
 	ret
-ENDPROC(__hyp_get_vectors)
 ENDPROC(__hyp_set_vectors)
+
+ENTRY(__hyp_get_vectors)
+	hvc	#HVC_GET_VECTORS
+	ret
+ENDPROC(__hyp_get_vectors)
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index b72aa9f..3972ee9 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -26,6 +26,7 @@
 #include <asm/kvm_asm.h>
 #include <asm/kvm_arm.h>
 #include <asm/kvm_mmu.h>
+#include <asm/virt.h>
 
 #define CPU_GP_REG_OFFSET(x)	(CPU_GP_REGS + x)
 #define CPU_XREG_OFFSET(x)	CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x)
@@ -1105,12 +1106,9 @@ __hyp_panic_str:
  * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c).  Return values are
  * passed in r0 and r1.
  *
- * A function pointer with a value of 0 has a special meaning, and is
- * used to implement __hyp_get_vectors in the same way as in
- * arch/arm64/kernel/hyp_stub.S.
  */
 ENTRY(kvm_call_hyp)
-	hvc	#0
+	hvc	#HVC_KVM_CALL_HYP
 	ret
 ENDPROC(kvm_call_hyp)
 
@@ -1140,6 +1138,7 @@ el1_sync:					// Guest trapped into EL2
 	push	x2, x3
 
 	mrs	x1, esr_el2
+	and	x0, x1, #ESR_EL2_ISS
 	lsr	x2, x1, #ESR_EL2_EC_SHIFT
 
 	cmp	x2, #ESR_EL2_EC_HVC64
@@ -1149,15 +1148,19 @@ el1_sync:					// Guest trapped into EL2
 	cbnz	x3, el1_trap			// called HVC
 
 	/* Here, we're pretty sure the host called HVC. */
+	mov	x10, x0
 	pop	x2, x3
 	pop	x0, x1
 
-	/* Check for __hyp_get_vectors */
-	cbnz	x0, 1f
+	cmp	x10, #HVC_GET_VECTORS
+	b.ne	1f
 	mrs	x0, vbar_el2
 	b	2f
 
-1:	push	lr, xzr
+1:	cmp	x10, #HVC_KVM_CALL_HYP
+	b.ne	1f
+
+	push	lr, xzr
 
 	/*
 	 * Compute the function address in EL2, and shuffle the parameters.
@@ -1170,6 +1173,8 @@ el1_sync:					// Guest trapped into EL2
 	blr	lr
 
 	pop	lr, xzr
+
+1:
 2:	eret
 
 el1_trap:
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [PATCH 04/13] arm64: Add new hcall HVC_CALL_FUNC
  2014-09-09 22:51 ` Geoff Levand
@ 2014-09-09 22:49   ` Geoff Levand
  -1 siblings, 0 replies; 80+ messages in thread
From: Geoff Levand @ 2014-09-09 22:49 UTC (permalink / raw)
  To: linux-arm-kernel

Add the new hcall HVC_CALL_FUNC that allows execution of a function at EL2.
During CPU reset the CPU must be brought to the exception level it had on
entry to the kernel.  The HVC_CALL_FUNC hcall will provide the mechanism
needed for this exception level switch.

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/include/asm/virt.h | 11 +++++++++++
 arch/arm64/kernel/hyp-stub.S  | 10 ++++++++++
 2 files changed, 21 insertions(+)

diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 894fe53..b217fbc 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -41,6 +41,17 @@
 
 #define HVC_KVM_CALL_HYP 3
 
+/*
+ * HVC_CALL_FUNC - Execute a function at EL2.
+ *
+ * @x0: Physical address of the funtion to be executed.
+ * @x1: Passed as the first argument to @fn.
+ * @x2: Passed as the second argument to @fn.
+ * @x3: Passed as the third argument to @fn.
+ */
+
+#define HVC_CALL_FUNC 4
+
 #ifndef __ASSEMBLY__
 
 /*
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index 9ab5f70..a21cf51 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -75,7 +75,17 @@ el1_sync:
 1:	cmp	x10, #HVC_SET_VECTORS
 	b.ne	1f
 	msr	vbar_el2, x0
+	b	2f
 
+1:	cmp	x10, #HVC_CALL_FUNC
+	b.ne    1f
+	mov	x29, lr
+	mov	lr, x0
+	mov	x0, x1
+	mov	x1, x2
+	mov	x2, x3
+	blr	lr
+	mov	lr, x29
 1:
 2:	eret
 ENDPROC(el1_sync)
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [PATCH 05/13] arm64: Add EL2 switch to soft_restart
  2014-09-09 22:51 ` Geoff Levand
@ 2014-09-09 22:49   ` Geoff Levand
  -1 siblings, 0 replies; 80+ messages in thread
From: Geoff Levand @ 2014-09-09 22:49 UTC (permalink / raw)
  To: linux-arm-kernel

When a CPU is reset it needs to be put into the exception level it had when it
entered the kernel.  Update cpu_reset() to accept an argument el2_switch which
signals cpu_reset() to enter the soft reset address at EL2.  If el2_switch is
not set the soft reset address will be entered at EL1.

Update cpu_soft_restart() and soft_restart() to pass the return of
is_hyp_mode_available() as the el2_switch value to cpu_reset().  Also update the
comments of cpu_reset(), cpu_soft_restart() and soft_restart() to reflect this
change.

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/include/asm/proc-fns.h |  4 ++--
 arch/arm64/kernel/process.c       |  6 ++++-
 arch/arm64/mm/proc.S              | 47 +++++++++++++++++++++++++++++----------
 3 files changed, 42 insertions(+), 15 deletions(-)

diff --git a/arch/arm64/include/asm/proc-fns.h b/arch/arm64/include/asm/proc-fns.h
index 9a8fd84..339394d 100644
--- a/arch/arm64/include/asm/proc-fns.h
+++ b/arch/arm64/include/asm/proc-fns.h
@@ -32,8 +32,8 @@ extern void cpu_cache_off(void);
 extern void cpu_do_idle(void);
 extern void cpu_do_switch_mm(unsigned long pgd_phys, struct mm_struct *mm);
 extern void cpu_reset(unsigned long addr) __attribute__((noreturn));
-void cpu_soft_restart(phys_addr_t cpu_reset,
-		unsigned long addr) __attribute__((noreturn));
+void cpu_soft_restart(phys_addr_t cpu_reset, unsigned long el2_switch,
+		      unsigned long addr) __attribute__((noreturn));
 extern void cpu_do_suspend(struct cpu_suspend_ctx *ptr);
 extern u64 cpu_do_resume(phys_addr_t ptr, u64 idmap_ttbr);
 
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index bf66922..0a3414b 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -50,6 +50,7 @@
 #include <asm/mmu_context.h>
 #include <asm/processor.h>
 #include <asm/stacktrace.h>
+#include <asm/virt.h>
 
 #ifdef CONFIG_CC_STACKPROTECTOR
 #include <linux/stackprotector.h>
@@ -60,7 +61,10 @@ EXPORT_SYMBOL(__stack_chk_guard);
 void soft_restart(unsigned long addr)
 {
 	setup_mm_for_reboot();
-	cpu_soft_restart(virt_to_phys(cpu_reset), addr);
+
+	cpu_soft_restart(virt_to_phys(cpu_reset), is_hyp_mode_available(),
+			 addr);
+
 	/* Should never get here */
 	BUG();
 }
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 4e778b1..7467199 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -25,6 +25,7 @@
 #include <asm/hwcap.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/pgtable.h>
+#include <asm/virt.h>
 
 #include "proc-macros.S"
 
@@ -59,27 +60,48 @@ ENTRY(cpu_cache_off)
 ENDPROC(cpu_cache_off)
 
 /*
- *	cpu_reset(loc)
+ * cpu_reset(el2_switch, loc) - Helper for cpu_soft_restart.
  *
- *	Perform a soft reset of the system.  Put the CPU into the same state
- *	as it would be if it had been reset, and branch to what would be the
- *	reset vector. It must be executed with the flat identity mapping.
+ * @cpu_reset: Physical address of the cpu_reset routine.
+ * @el2_switch: Flag to indicate a swich to EL2 is needed.
+ * @addr: Location to jump to for soft reset.
  *
- *	- loc   - location to jump to for soft reset
+ * Put the CPU into the same state as it would be if it had been reset, and
+ * branch to what would be the reset vector. It must be executed with the
+ * flat identity mapping.
  */
+
 	.align	5
+
 ENTRY(cpu_reset)
-	mrs	x1, sctlr_el1
-	bic	x1, x1, #1
-	msr	sctlr_el1, x1			// disable the MMU
+	mrs	x2, sctlr_el1
+	bic	x2, x2, #1
+	msr	sctlr_el1, x2			// disable the MMU
 	isb
-	ret	x0
+
+	cbz	x0, 1f				// el2_switch?
+	mov	x0, x1
+	mov	x1, xzr
+	mov	x2, xzr
+	mov	x3, xzr
+	hvc	#HVC_CALL_FUNC			// no return
+
+1:	ret	x1
 ENDPROC(cpu_reset)
 
+/*
+ * cpu_soft_restart(cpu_reset, el2_switch, addr) - Perform a cpu soft reset.
+ *
+ * @cpu_reset: Physical address of the cpu_reset routine.
+ * @el2_switch: Flag to indicate a swich to EL2 is needed, passed to cpu_reset.
+ * @addr: Location to jump to for soft reset, passed to cpu_reset.
+ *
+ */
+
 ENTRY(cpu_soft_restart)
-	/* Save address of cpu_reset() and reset address */
-	mov	x19, x0
-	mov	x20, x1
+	mov	x19, x0				// cpu_reset
+	mov	x20, x1				// el2_switch
+	mov	x21, x2				// addr
 
 	/* Turn D-cache off */
 	bl	cpu_cache_off
@@ -88,6 +110,7 @@ ENTRY(cpu_soft_restart)
 	bl	flush_cache_all
 
 	mov	x0, x20
+	mov	x1, x21
 	ret	x19
 ENDPROC(cpu_soft_restart)
 
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [PATCH 06/13] arm64: Add new routine read_cpu_properties
  2014-09-09 22:51 ` Geoff Levand
@ 2014-09-09 22:49   ` Geoff Levand
  -1 siblings, 0 replies; 80+ messages in thread
From: Geoff Levand @ 2014-09-09 22:49 UTC (permalink / raw)
  To: linux-arm-kernel

The kexec re-boot support that will be added in a subsequent patch in this
series will need to read the device tree CPU properties, and it is expected
that a rework of the SMP spin table code to handle cpu_die will also need this
functionality, so add two new common arm64 files cpu-properties.h and
cpu-properties.c that define a new structure cpu_properties that hold the
various CPU properties from a device tree, and the new routine
read_cpu_properties() that fills the structure from a device tree CPU node.

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/kernel/cpu-properties.c | 58 ++++++++++++++++++++++++++++++++++++++
 arch/arm64/kernel/cpu-properties.h | 39 +++++++++++++++++++++++++
 2 files changed, 97 insertions(+)
 create mode 100644 arch/arm64/kernel/cpu-properties.c
 create mode 100644 arch/arm64/kernel/cpu-properties.h

diff --git a/arch/arm64/kernel/cpu-properties.c b/arch/arm64/kernel/cpu-properties.c
new file mode 100644
index 0000000..e64b34b
--- /dev/null
+++ b/arch/arm64/kernel/cpu-properties.c
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) Linaro.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "cpu-properties.h"
+
+int read_cpu_properties(struct cpu_properties *p, const struct device_node *dn)
+{
+	const u32 *cell;
+
+	memset(p, 0, sizeof(*p));
+	p->hwid = INVALID_HWID;
+	p->cpu_release_addr = INVALID_ADDR;
+
+	cell = of_get_property(dn, "reg", NULL);
+
+	if (!cell) {
+		pr_err("%s: Error: %s: invalid reg property\n",
+		       __func__, dn->full_name);
+		return -1;
+	}
+
+	p->hwid = of_read_number(cell,
+		of_n_addr_cells((struct device_node *)dn)) & MPIDR_HWID_BITMASK;
+
+	p->enable_method = of_get_property(dn, "enable-method", NULL);
+
+	if (!p->enable_method) {
+		pr_err("%s: Error: %s: invalid enable-method\n",
+		       __func__, dn->full_name);
+		return -1;
+	}
+
+	if (!strcmp(p->enable_method, "psci")) {
+		p->type = cpu_enable_method_psci;
+		return 0;
+	}
+
+	if (strcmp(p->enable_method, "spin-table")) {
+		p->type = cpu_enable_method_unknown;
+		return -1;
+	}
+
+	p->type = cpu_enable_method_spin_table;
+
+	if (of_property_read_u64(dn, "cpu-release-addr",
+				 &p->cpu_release_addr)) {
+		pr_err("%s: Error: %s: invalid cpu-return-addr property\n",
+		       __func__, dn->full_name);
+		return -1;
+	}
+
+	return 0;
+}
diff --git a/arch/arm64/kernel/cpu-properties.h b/arch/arm64/kernel/cpu-properties.h
new file mode 100644
index 0000000..b4218ef
--- /dev/null
+++ b/arch/arm64/kernel/cpu-properties.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) Linaro.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#if !defined(__ARM64_CPU_PROPERTIES_H)
+#define __ARM64_CPU_PROPERTIES_H
+
+#include <asm/memory.h>
+#include <asm/cputype.h>
+
+#define INVALID_ADDR UL(~0)
+
+#if !defined(__ASSEMBLY__)
+
+#include <linux/kernel.h>
+#include <linux/of.h>
+
+enum cpu_enable_method {
+	cpu_enable_method_unknown,
+	cpu_enable_method_psci,
+	cpu_enable_method_spin_table,
+};
+
+struct cpu_properties {
+	u64 hwid;
+	u64 cpu_release_addr;
+	const char *enable_method;
+	enum cpu_enable_method type;
+};
+
+int read_cpu_properties(struct cpu_properties *p, const struct device_node *dn);
+
+#endif /* !defined(__ASSEMBLY__) */
+
+#endif
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [PATCH 02/13] arm64/kvm: Fix assembler compatibility of macros
  2014-09-09 22:51 ` Geoff Levand
@ 2014-09-09 22:49   ` Geoff Levand
  -1 siblings, 0 replies; 80+ messages in thread
From: Geoff Levand @ 2014-09-09 22:49 UTC (permalink / raw)
  To: linux-arm-kernel

Some of the macros defined in kvm_arm.h are useful in the exception vector
routines, but they are not compatible with the assembler.  Change the
definition of ESR_EL2_ISS to be compatible.

Fixes build errors like these when using kvm_arm.h in assembly
source files:

  Error: unexpected characters following instruction at operand 3 -- `add x0,x1,#((1U<<25)-1)'

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/include/asm/kvm_arm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index cc83520..e0e7e64 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -176,7 +176,7 @@
 #define ESR_EL2_EC_SHIFT	(26)
 #define ESR_EL2_EC		(0x3fU << ESR_EL2_EC_SHIFT)
 #define ESR_EL2_IL		(1U << 25)
-#define ESR_EL2_ISS		(ESR_EL2_IL - 1)
+#define ESR_EL2_ISS		(0xffff)
 #define ESR_EL2_ISV_SHIFT	(24)
 #define ESR_EL2_ISV		(1U << ESR_EL2_ISV_SHIFT)
 #define ESR_EL2_SAS_SHIFT	(22)
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [PATCH 07/13] arm64: Add new routine local_disable
  2014-09-09 22:51 ` Geoff Levand
@ 2014-09-09 22:49   ` Geoff Levand
  -1 siblings, 0 replies; 80+ messages in thread
From: Geoff Levand @ 2014-09-09 22:49 UTC (permalink / raw)
  To: linux-arm-kernel

Add the new arm64 routine local_disable() to allow the masking of several DAIF
flags in one operation.  Currently, we only have routines to mask individual
flags, and to mask several flags multiple calls to daifset are required.

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/include/asm/irqflags.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
index 11cc941..28521d4 100644
--- a/arch/arm64/include/asm/irqflags.h
+++ b/arch/arm64/include/asm/irqflags.h
@@ -113,5 +113,18 @@ static inline int arch_irqs_disabled_flags(unsigned long flags)
 #define local_dbg_enable()	asm("msr	daifclr, #8" : : : "memory")
 #define local_dbg_disable()	asm("msr	daifset, #8" : : : "memory")
 
+enum daif_flag {
+	DAIF_FIQ   = (1UL << 6),
+	DAIF_IRQ   = (1UL << 7),
+	DAIF_ASYNC = (1UL << 8),
+	DAIF_DBG   = (1UL << 9),
+	DAIF_ALL   = (0xffUL << 6),
+};
+
+static inline void local_disable(unsigned long daif_flags)
+{
+	arch_local_irq_restore(daif_flags | arch_local_save_flags());
+}
+
 #endif
 #endif
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [PATCH 01/13] arm64: Add ESR_EL2_EC macros to hyp-stub
@ 2014-09-09 22:49   ` Geoff Levand
  0 siblings, 0 replies; 80+ messages in thread
From: Geoff Levand @ 2014-09-09 22:49 UTC (permalink / raw)
  To: Catalin Marinas, Will Deacon
  Cc: marc.zyngier, kexec, linux-arm-kernel, christoffer.dall

To improve the readability of the el1_sync routine in hyp-stub.S replace the
numeric immediate values with preprocessor macros ESR_EL2_EC_SHIFT and
ESR_EL2_EC_HVC64.

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/kernel/hyp-stub.S | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index a272f33..2d960a9 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -52,10 +52,13 @@ ENDPROC(__hyp_stub_vectors)
 
 	.align 11
 
+#define ESR_EL2_EC_SHIFT	26
+#define ESR_EL2_EC_HVC64	0x16
+
 el1_sync:
 	mrs	x1, esr_el2
-	lsr	x1, x1, #26
-	cmp	x1, #0x16
+	lsr	x1, x1, #ESR_EL2_EC_SHIFT
+	cmp	x1, #ESR_EL2_EC_HVC64
 	b.ne	2f				// Not an HVC trap
 	cbz	x0, 1f
 	msr	vbar_el2, x0			// Set vbar_el2
-- 
1.9.1



_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [PATCH 03/13] arm64: Convert hcalls to use ISS field
@ 2014-09-09 22:49   ` Geoff Levand
  0 siblings, 0 replies; 80+ messages in thread
From: Geoff Levand @ 2014-09-09 22:49 UTC (permalink / raw)
  To: Catalin Marinas, Will Deacon
  Cc: marc.zyngier, kexec, linux-arm-kernel, christoffer.dall

To allow for additional hcalls to be defined and to make the arm64 hcall API
more consistent across exception vector routines change the hcall implementations
to use the ISS field of the ESR_EL2 register to specify the hcall type.

The existing arm64 hcall implementations are limited in that they only allow
for two distinct hcalls; with the x0 register either zero, or not zero.  Also,
the API of the hyp-stub exception vector routines and the KVM exception vector
routines differ; hyp-stub uses a non-zero value in x0 to implement
__hyp_set_vectors, whereas KVM uses it to implement kvm_call_hyp.

Define three new preprocessor macros HVC_GET_VECTORS, HVC_SET_VECTORS and
HVC_KVM_CALL_HYP and to be used as hcall type specifiers and convert the
existing __hyp_get_vectors(), __hyp_set_vectors() and kvm_call_hyp() routines
to use these new macros when executing and HVC call.  Also change the
corresponding hyp-stub and KVM el1_sync exception vector routines to use these
new macros.

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/include/asm/virt.h | 20 ++++++++++++++++++++
 arch/arm64/kernel/hyp-stub.S  | 38 ++++++++++++++++++++++++++------------
 arch/arm64/kvm/hyp.S          | 19 ++++++++++++-------
 3 files changed, 58 insertions(+), 19 deletions(-)

diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 7a5df52..894fe53 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -21,6 +21,26 @@
 #define BOOT_CPU_MODE_EL1	(0xe11)
 #define BOOT_CPU_MODE_EL2	(0xe12)
 
+/*
+ * HVC_GET_VECTORS - Return the value of the vbar_el2 register.
+ */
+
+#define HVC_GET_VECTORS 1
+
+/*
+ * HVC_SET_VECTORS - Set the value of the vbar_el2 register.
+ *
+ * @x0: Physical address of the new vector table.
+ */
+
+#define HVC_SET_VECTORS 2
+
+/*
+ * HVC_KVM_CALL_HYP - Execute kvm_call_hyp routine.
+ */
+
+#define HVC_KVM_CALL_HYP 3
+
 #ifndef __ASSEMBLY__
 
 /*
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index 2d960a9..9ab5f70 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -54,16 +54,29 @@ ENDPROC(__hyp_stub_vectors)
 
 #define ESR_EL2_EC_SHIFT	26
 #define ESR_EL2_EC_HVC64	0x16
+#define ESR_EL2_ISS		0xffff
 
 el1_sync:
-	mrs	x1, esr_el2
-	lsr	x1, x1, #ESR_EL2_EC_SHIFT
-	cmp	x1, #ESR_EL2_EC_HVC64
-	b.ne	2f				// Not an HVC trap
-	cbz	x0, 1f
-	msr	vbar_el2, x0			// Set vbar_el2
+	mrs	x10, esr_el2
+	lsr	x9, x10, #ESR_EL2_EC_SHIFT	// x9=EC
+	and	x10, x10, #ESR_EL2_ISS		// x10=ISS
+
+	cmp     x9, #ESR_EL2_EC_HVC64
+	b.ne    2f                              // Not a host HVC trap
+
+	mrs     x9, vttbr_el2
+	cbnz    x9, 2f                          // Not a host HVC trap
+
+	cmp	x10, #HVC_GET_VECTORS
+	b.ne	1f
+	mrs	x0, vbar_el2
 	b	2f
-1:	mrs	x0, vbar_el2			// Return vbar_el2
+
+1:	cmp	x10, #HVC_SET_VECTORS
+	b.ne	1f
+	msr	vbar_el2, x0
+
+1:
 2:	eret
 ENDPROC(el1_sync)
 
@@ -103,11 +116,12 @@ ENDPROC(\label)
  * initialisation entry point.
  */
 
-ENTRY(__hyp_get_vectors)
-	mov	x0, xzr
-	// fall through
 ENTRY(__hyp_set_vectors)
-	hvc	#0
+	hvc	#HVC_SET_VECTORS
 	ret
-ENDPROC(__hyp_get_vectors)
 ENDPROC(__hyp_set_vectors)
+
+ENTRY(__hyp_get_vectors)
+	hvc	#HVC_GET_VECTORS
+	ret
+ENDPROC(__hyp_get_vectors)
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index b72aa9f..3972ee9 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -26,6 +26,7 @@
 #include <asm/kvm_asm.h>
 #include <asm/kvm_arm.h>
 #include <asm/kvm_mmu.h>
+#include <asm/virt.h>
 
 #define CPU_GP_REG_OFFSET(x)	(CPU_GP_REGS + x)
 #define CPU_XREG_OFFSET(x)	CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x)
@@ -1105,12 +1106,9 @@ __hyp_panic_str:
  * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c).  Return values are
  * passed in r0 and r1.
  *
- * A function pointer with a value of 0 has a special meaning, and is
- * used to implement __hyp_get_vectors in the same way as in
- * arch/arm64/kernel/hyp_stub.S.
  */
 ENTRY(kvm_call_hyp)
-	hvc	#0
+	hvc	#HVC_KVM_CALL_HYP
 	ret
 ENDPROC(kvm_call_hyp)
 
@@ -1140,6 +1138,7 @@ el1_sync:					// Guest trapped into EL2
 	push	x2, x3
 
 	mrs	x1, esr_el2
+	and	x0, x1, #ESR_EL2_ISS
 	lsr	x2, x1, #ESR_EL2_EC_SHIFT
 
 	cmp	x2, #ESR_EL2_EC_HVC64
@@ -1149,15 +1148,19 @@ el1_sync:					// Guest trapped into EL2
 	cbnz	x3, el1_trap			// called HVC
 
 	/* Here, we're pretty sure the host called HVC. */
+	mov	x10, x0
 	pop	x2, x3
 	pop	x0, x1
 
-	/* Check for __hyp_get_vectors */
-	cbnz	x0, 1f
+	cmp	x10, #HVC_GET_VECTORS
+	b.ne	1f
 	mrs	x0, vbar_el2
 	b	2f
 
-1:	push	lr, xzr
+1:	cmp	x10, #HVC_KVM_CALL_HYP
+	b.ne	1f
+
+	push	lr, xzr
 
 	/*
 	 * Compute the function address in EL2, and shuffle the parameters.
@@ -1170,6 +1173,8 @@ el1_sync:					// Guest trapped into EL2
 	blr	lr
 
 	pop	lr, xzr
+
+1:
 2:	eret
 
 el1_trap:
-- 
1.9.1



_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [PATCH 02/13] arm64/kvm: Fix assembler compatibility of macros
@ 2014-09-09 22:49   ` Geoff Levand
  0 siblings, 0 replies; 80+ messages in thread
From: Geoff Levand @ 2014-09-09 22:49 UTC (permalink / raw)
  To: Catalin Marinas, Will Deacon
  Cc: marc.zyngier, kexec, linux-arm-kernel, christoffer.dall

Some of the macros defined in kvm_arm.h are useful in the exception vector
routines, but they are not compatible with the assembler.  Change the
definition of ESR_EL2_ISS to be compatible.

Fixes build errors like these when using kvm_arm.h in assembly
source files:

  Error: unexpected characters following instruction at operand 3 -- `add x0,x1,#((1U<<25)-1)'

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/include/asm/kvm_arm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index cc83520..e0e7e64 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -176,7 +176,7 @@
 #define ESR_EL2_EC_SHIFT	(26)
 #define ESR_EL2_EC		(0x3fU << ESR_EL2_EC_SHIFT)
 #define ESR_EL2_IL		(1U << 25)
-#define ESR_EL2_ISS		(ESR_EL2_IL - 1)
+#define ESR_EL2_ISS		(0xffff)
 #define ESR_EL2_ISV_SHIFT	(24)
 #define ESR_EL2_ISV		(1U << ESR_EL2_ISV_SHIFT)
 #define ESR_EL2_SAS_SHIFT	(22)
-- 
1.9.1



_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [PATCH 04/13] arm64: Add new hcall HVC_CALL_FUNC
@ 2014-09-09 22:49   ` Geoff Levand
  0 siblings, 0 replies; 80+ messages in thread
From: Geoff Levand @ 2014-09-09 22:49 UTC (permalink / raw)
  To: Catalin Marinas, Will Deacon
  Cc: marc.zyngier, kexec, linux-arm-kernel, christoffer.dall

Add the new hcall HVC_CALL_FUNC that allows execution of a function at EL2.
During CPU reset the CPU must be brought to the exception level it had on
entry to the kernel.  The HVC_CALL_FUNC hcall will provide the mechanism
needed for this exception level switch.

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/include/asm/virt.h | 11 +++++++++++
 arch/arm64/kernel/hyp-stub.S  | 10 ++++++++++
 2 files changed, 21 insertions(+)

diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 894fe53..b217fbc 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -41,6 +41,17 @@
 
 #define HVC_KVM_CALL_HYP 3
 
+/*
+ * HVC_CALL_FUNC - Execute a function at EL2.
+ *
+ * @x0: Physical address of the funtion to be executed.
+ * @x1: Passed as the first argument to @fn.
+ * @x2: Passed as the second argument to @fn.
+ * @x3: Passed as the third argument to @fn.
+ */
+
+#define HVC_CALL_FUNC 4
+
 #ifndef __ASSEMBLY__
 
 /*
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index 9ab5f70..a21cf51 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -75,7 +75,17 @@ el1_sync:
 1:	cmp	x10, #HVC_SET_VECTORS
 	b.ne	1f
 	msr	vbar_el2, x0
+	b	2f
 
+1:	cmp	x10, #HVC_CALL_FUNC
+	b.ne    1f
+	mov	x29, lr
+	mov	lr, x0
+	mov	x0, x1
+	mov	x1, x2
+	mov	x2, x3
+	blr	lr
+	mov	lr, x29
 1:
 2:	eret
 ENDPROC(el1_sync)
-- 
1.9.1



_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [PATCH 05/13] arm64: Add EL2 switch to soft_restart
@ 2014-09-09 22:49   ` Geoff Levand
  0 siblings, 0 replies; 80+ messages in thread
From: Geoff Levand @ 2014-09-09 22:49 UTC (permalink / raw)
  To: Catalin Marinas, Will Deacon
  Cc: marc.zyngier, kexec, linux-arm-kernel, christoffer.dall

When a CPU is reset it needs to be put into the exception level it had when it
entered the kernel.  Update cpu_reset() to accept an argument el2_switch which
signals cpu_reset() to enter the soft reset address at EL2.  If el2_switch is
not set the soft reset address will be entered at EL1.

Update cpu_soft_restart() and soft_restart() to pass the return of
is_hyp_mode_available() as the el2_switch value to cpu_reset().  Also update the
comments of cpu_reset(), cpu_soft_restart() and soft_restart() to reflect this
change.

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/include/asm/proc-fns.h |  4 ++--
 arch/arm64/kernel/process.c       |  6 ++++-
 arch/arm64/mm/proc.S              | 47 +++++++++++++++++++++++++++++----------
 3 files changed, 42 insertions(+), 15 deletions(-)

diff --git a/arch/arm64/include/asm/proc-fns.h b/arch/arm64/include/asm/proc-fns.h
index 9a8fd84..339394d 100644
--- a/arch/arm64/include/asm/proc-fns.h
+++ b/arch/arm64/include/asm/proc-fns.h
@@ -32,8 +32,8 @@ extern void cpu_cache_off(void);
 extern void cpu_do_idle(void);
 extern void cpu_do_switch_mm(unsigned long pgd_phys, struct mm_struct *mm);
 extern void cpu_reset(unsigned long addr) __attribute__((noreturn));
-void cpu_soft_restart(phys_addr_t cpu_reset,
-		unsigned long addr) __attribute__((noreturn));
+void cpu_soft_restart(phys_addr_t cpu_reset, unsigned long el2_switch,
+		      unsigned long addr) __attribute__((noreturn));
 extern void cpu_do_suspend(struct cpu_suspend_ctx *ptr);
 extern u64 cpu_do_resume(phys_addr_t ptr, u64 idmap_ttbr);
 
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index bf66922..0a3414b 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -50,6 +50,7 @@
 #include <asm/mmu_context.h>
 #include <asm/processor.h>
 #include <asm/stacktrace.h>
+#include <asm/virt.h>
 
 #ifdef CONFIG_CC_STACKPROTECTOR
 #include <linux/stackprotector.h>
@@ -60,7 +61,10 @@ EXPORT_SYMBOL(__stack_chk_guard);
 void soft_restart(unsigned long addr)
 {
 	setup_mm_for_reboot();
-	cpu_soft_restart(virt_to_phys(cpu_reset), addr);
+
+	cpu_soft_restart(virt_to_phys(cpu_reset), is_hyp_mode_available(),
+			 addr);
+
 	/* Should never get here */
 	BUG();
 }
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 4e778b1..7467199 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -25,6 +25,7 @@
 #include <asm/hwcap.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/pgtable.h>
+#include <asm/virt.h>
 
 #include "proc-macros.S"
 
@@ -59,27 +60,48 @@ ENTRY(cpu_cache_off)
 ENDPROC(cpu_cache_off)
 
 /*
- *	cpu_reset(loc)
+ * cpu_reset(el2_switch, loc) - Helper for cpu_soft_restart.
  *
- *	Perform a soft reset of the system.  Put the CPU into the same state
- *	as it would be if it had been reset, and branch to what would be the
- *	reset vector. It must be executed with the flat identity mapping.
+ * @cpu_reset: Physical address of the cpu_reset routine.
+ * @el2_switch: Flag to indicate a swich to EL2 is needed.
+ * @addr: Location to jump to for soft reset.
  *
- *	- loc   - location to jump to for soft reset
+ * Put the CPU into the same state as it would be if it had been reset, and
+ * branch to what would be the reset vector. It must be executed with the
+ * flat identity mapping.
  */
+
 	.align	5
+
 ENTRY(cpu_reset)
-	mrs	x1, sctlr_el1
-	bic	x1, x1, #1
-	msr	sctlr_el1, x1			// disable the MMU
+	mrs	x2, sctlr_el1
+	bic	x2, x2, #1
+	msr	sctlr_el1, x2			// disable the MMU
 	isb
-	ret	x0
+
+	cbz	x0, 1f				// el2_switch?
+	mov	x0, x1
+	mov	x1, xzr
+	mov	x2, xzr
+	mov	x3, xzr
+	hvc	#HVC_CALL_FUNC			// no return
+
+1:	ret	x1
 ENDPROC(cpu_reset)
 
+/*
+ * cpu_soft_restart(cpu_reset, el2_switch, addr) - Perform a cpu soft reset.
+ *
+ * @cpu_reset: Physical address of the cpu_reset routine.
+ * @el2_switch: Flag to indicate a swich to EL2 is needed, passed to cpu_reset.
+ * @addr: Location to jump to for soft reset, passed to cpu_reset.
+ *
+ */
+
 ENTRY(cpu_soft_restart)
-	/* Save address of cpu_reset() and reset address */
-	mov	x19, x0
-	mov	x20, x1
+	mov	x19, x0				// cpu_reset
+	mov	x20, x1				// el2_switch
+	mov	x21, x2				// addr
 
 	/* Turn D-cache off */
 	bl	cpu_cache_off
@@ -88,6 +110,7 @@ ENTRY(cpu_soft_restart)
 	bl	flush_cache_all
 
 	mov	x0, x20
+	mov	x1, x21
 	ret	x19
 ENDPROC(cpu_soft_restart)
 
-- 
1.9.1



_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [PATCH 06/13] arm64: Add new routine read_cpu_properties
@ 2014-09-09 22:49   ` Geoff Levand
  0 siblings, 0 replies; 80+ messages in thread
From: Geoff Levand @ 2014-09-09 22:49 UTC (permalink / raw)
  To: Catalin Marinas, Will Deacon
  Cc: marc.zyngier, kexec, linux-arm-kernel, christoffer.dall

The kexec re-boot support that will be added in a subsequent patch in this
series will need to read the device tree CPU properties, and it is expected
that a rework of the SMP spin table code to handle cpu_die will also need this
functionality, so add two new common arm64 files cpu-properties.h and
cpu-properties.c that define a new structure cpu_properties that hold the
various CPU properties from a device tree, and the new routine
read_cpu_properties() that fills the structure from a device tree CPU node.

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/kernel/cpu-properties.c | 58 ++++++++++++++++++++++++++++++++++++++
 arch/arm64/kernel/cpu-properties.h | 39 +++++++++++++++++++++++++
 2 files changed, 97 insertions(+)
 create mode 100644 arch/arm64/kernel/cpu-properties.c
 create mode 100644 arch/arm64/kernel/cpu-properties.h

diff --git a/arch/arm64/kernel/cpu-properties.c b/arch/arm64/kernel/cpu-properties.c
new file mode 100644
index 0000000..e64b34b
--- /dev/null
+++ b/arch/arm64/kernel/cpu-properties.c
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) Linaro.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "cpu-properties.h"
+
+int read_cpu_properties(struct cpu_properties *p, const struct device_node *dn)
+{
+	const u32 *cell;
+
+	memset(p, 0, sizeof(*p));
+	p->hwid = INVALID_HWID;
+	p->cpu_release_addr = INVALID_ADDR;
+
+	cell = of_get_property(dn, "reg", NULL);
+
+	if (!cell) {
+		pr_err("%s: Error: %s: invalid reg property\n",
+		       __func__, dn->full_name);
+		return -1;
+	}
+
+	p->hwid = of_read_number(cell,
+		of_n_addr_cells((struct device_node *)dn)) & MPIDR_HWID_BITMASK;
+
+	p->enable_method = of_get_property(dn, "enable-method", NULL);
+
+	if (!p->enable_method) {
+		pr_err("%s: Error: %s: invalid enable-method\n",
+		       __func__, dn->full_name);
+		return -1;
+	}
+
+	if (!strcmp(p->enable_method, "psci")) {
+		p->type = cpu_enable_method_psci;
+		return 0;
+	}
+
+	if (strcmp(p->enable_method, "spin-table")) {
+		p->type = cpu_enable_method_unknown;
+		return -1;
+	}
+
+	p->type = cpu_enable_method_spin_table;
+
+	if (of_property_read_u64(dn, "cpu-release-addr",
+				 &p->cpu_release_addr)) {
+		pr_err("%s: Error: %s: invalid cpu-return-addr property\n",
+		       __func__, dn->full_name);
+		return -1;
+	}
+
+	return 0;
+}
diff --git a/arch/arm64/kernel/cpu-properties.h b/arch/arm64/kernel/cpu-properties.h
new file mode 100644
index 0000000..b4218ef
--- /dev/null
+++ b/arch/arm64/kernel/cpu-properties.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) Linaro.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#if !defined(__ARM64_CPU_PROPERTIES_H)
+#define __ARM64_CPU_PROPERTIES_H
+
+#include <asm/memory.h>
+#include <asm/cputype.h>
+
+#define INVALID_ADDR UL(~0)
+
+#if !defined(__ASSEMBLY__)
+
+#include <linux/kernel.h>
+#include <linux/of.h>
+
+enum cpu_enable_method {
+	cpu_enable_method_unknown,
+	cpu_enable_method_psci,
+	cpu_enable_method_spin_table,
+};
+
+struct cpu_properties {
+	u64 hwid;
+	u64 cpu_release_addr;
+	const char *enable_method;
+	enum cpu_enable_method type;
+};
+
+int read_cpu_properties(struct cpu_properties *p, const struct device_node *dn);
+
+#endif /* !defined(__ASSEMBLY__) */
+
+#endif
-- 
1.9.1



_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [PATCH 07/13] arm64: Add new routine local_disable
@ 2014-09-09 22:49   ` Geoff Levand
  0 siblings, 0 replies; 80+ messages in thread
From: Geoff Levand @ 2014-09-09 22:49 UTC (permalink / raw)
  To: Catalin Marinas, Will Deacon
  Cc: marc.zyngier, kexec, linux-arm-kernel, christoffer.dall

Add the new arm64 routine local_disable() to allow the masking of several DAIF
flags in one operation.  Currently, we only have routines to mask individual
flags, and to mask several flags multiple calls to daifset are required.

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/include/asm/irqflags.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
index 11cc941..28521d4 100644
--- a/arch/arm64/include/asm/irqflags.h
+++ b/arch/arm64/include/asm/irqflags.h
@@ -113,5 +113,18 @@ static inline int arch_irqs_disabled_flags(unsigned long flags)
 #define local_dbg_enable()	asm("msr	daifclr, #8" : : : "memory")
 #define local_dbg_disable()	asm("msr	daifset, #8" : : : "memory")
 
+enum daif_flag {
+	DAIF_FIQ   = (1UL << 6),
+	DAIF_IRQ   = (1UL << 7),
+	DAIF_ASYNC = (1UL << 8),
+	DAIF_DBG   = (1UL << 9),
+	DAIF_ALL   = (0xffUL << 6),
+};
+
+static inline void local_disable(unsigned long daif_flags)
+{
+	arch_local_irq_restore(daif_flags | arch_local_save_flags());
+}
+
 #endif
 #endif
-- 
1.9.1



_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [PATCH 08/13] arm64: Use cpu_ops for smp_stop
  2014-09-09 22:51 ` Geoff Levand
@ 2014-09-09 22:49   ` Geoff Levand
  -1 siblings, 0 replies; 80+ messages in thread
From: Geoff Levand @ 2014-09-09 22:49 UTC (permalink / raw)
  To: linux-arm-kernel

The current implementation of ipi_cpu_stop() is just a tight infinite loop
around cpu_relax().  This infinite loop implementation is OK if the machine
will soon do a poweroff, but it doesn't have any mechanism to allow a CPU
to be brought back on-line, nor is it compatible with kexec re-boot.

Add a check for a valid cpu_die method of the appropriate cpu_ops structure,
and if a valid method is found, transfer control to that method.  It is
expected that the cpu_die method puts the CPU into a state such that they can
be brought back on-line or progress through a kexec re-boot.

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/kernel/smp.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 4743397..002aa8a 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -555,6 +555,15 @@ static void ipi_cpu_stop(unsigned int cpu)
 
 	local_irq_disable();
 
+	/* If we have the cpu ops use them. */
+
+	if (cpu_ops[cpu]->cpu_disable &&
+	    cpu_ops[cpu]->cpu_die &&
+	    !cpu_ops[cpu]->cpu_disable(cpu))
+		cpu_ops[cpu]->cpu_die(cpu);
+
+	/* Otherwise spin here. */
+
 	while (1)
 		cpu_relax();
 }
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [PATCH 10/13] arm64/kexec: Revert change to machine_shutdown()
  2014-09-09 22:51 ` Geoff Levand
@ 2014-09-09 22:49   ` Geoff Levand
  -1 siblings, 0 replies; 80+ messages in thread
From: Geoff Levand @ 2014-09-09 22:49 UTC (permalink / raw)
  To: linux-arm-kernel

Commit 90f51a09ef83 ("arm64: Fix machine_shutdown() definition") changed the
implementation of machine_shutdown() from calling smp_send_stop() to
calling disable_nonboot_cpus().  Revert that part of the commit so that
machine_shutdown() once again calls smp_send_stop().

With the application of another patch in this series, (arm64: Use cpu_ops for
smp_stop), smp_send_stop() will do the correct thing for a kexec reboot. This
change also corrects the source code comment for the machine_shutdown() routine.

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/kernel/process.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 0a3414b..cd0ae9d 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -101,15 +101,12 @@ void arch_cpu_idle_dead(void)
 /*
  * Called by kexec, immediately prior to machine_kexec().
  *
- * This must completely disable all secondary CPUs; simply causing those CPUs
- * to execute e.g. a RAM-based pin loop is not sufficient. This allows the
- * kexec'd kernel to use any and all RAM as it sees fit, without having to
- * avoid any code or data used by any SW CPU pin loop. The CPU hotplug
- * functionality embodied in disable_nonboot_cpus() to achieve this.
+ * This must shutdown all secondary CPUs.  The functionality
+ * embodied in smp_send_stop() will achieve this.
  */
 void machine_shutdown(void)
 {
-	disable_nonboot_cpus();
+	smp_send_stop();
 }
 
 /*
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [PATCH 09/13] arm64/kexec: Kexec expects cpu_die
  2014-09-09 22:51 ` Geoff Levand
@ 2014-09-09 22:49   ` Geoff Levand
  -1 siblings, 0 replies; 80+ messages in thread
From: Geoff Levand @ 2014-09-09 22:49 UTC (permalink / raw)
  To: linux-arm-kernel

The arm64 implementation of kexec expects an operational cpu_die method of
struct cpu_operations, so add defined(CONFIG_KEXEC) to the preprocessor
conditional that enables cpu_die.

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/include/asm/cpu_ops.h | 2 +-
 arch/arm64/kernel/psci.c         | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h
index d7b4b38..9fd7281 100644
--- a/arch/arm64/include/asm/cpu_ops.h
+++ b/arch/arm64/include/asm/cpu_ops.h
@@ -50,7 +50,7 @@ struct cpu_operations {
 	int		(*cpu_prepare)(unsigned int);
 	int		(*cpu_boot)(unsigned int);
 	void		(*cpu_postboot)(void);
-#ifdef CONFIG_HOTPLUG_CPU
+#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_KEXEC)
 	int		(*cpu_disable)(unsigned int cpu);
 	void		(*cpu_die)(unsigned int cpu);
 	int		(*cpu_kill)(unsigned int cpu);
diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c
index 5539547..81dbbc9 100644
--- a/arch/arm64/kernel/psci.c
+++ b/arch/arm64/kernel/psci.c
@@ -380,7 +380,7 @@ static int cpu_psci_cpu_boot(unsigned int cpu)
 	return err;
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
+#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_KEXEC)
 static int cpu_psci_cpu_disable(unsigned int cpu)
 {
 	/* Fail early if we don't have CPU_OFF support */
@@ -442,7 +442,7 @@ const struct cpu_operations cpu_psci_ops = {
 	.cpu_init	= cpu_psci_cpu_init,
 	.cpu_prepare	= cpu_psci_cpu_prepare,
 	.cpu_boot	= cpu_psci_cpu_boot,
-#ifdef CONFIG_HOTPLUG_CPU
+#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_KEXEC)
 	.cpu_disable	= cpu_psci_cpu_disable,
 	.cpu_die	= cpu_psci_cpu_die,
 	.cpu_kill	= cpu_psci_cpu_kill,
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [PATCH 13/13] arm64/kexec: Add kexec_ignore_compat_check param
  2014-09-09 22:51 ` Geoff Levand
@ 2014-09-09 22:49   ` Geoff Levand
  -1 siblings, 0 replies; 80+ messages in thread
From: Geoff Levand @ 2014-09-09 22:49 UTC (permalink / raw)
  To: linux-arm-kernel

Add the new kernel command line parameter kexec_ignore_compat_check which
specifies whether or not to ignore arm64 kexec compatibility checks.

For some systems the compatibility checks may be too restrictive, and setting
this parameter could allow those systems to kexec to kernel images that would
not pass the compatibility checks.

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 Documentation/kernel-parameters.txt | 13 +++++++++++++
 arch/arm64/kernel/machine_kexec.c   | 19 +++++++++++++++++--
 2 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 5ae8608..c70f4b8 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1529,6 +1529,19 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			use the HighMem zone if it exists, and the Normal
 			zone if it does not.
 
+	kexec_ignore_compat_check [KEXEC,ARM64]
+			This parameter specifies whether or not to ignore arm64
+			kexec compatibility checks.  The default is to honor the
+			checks.  Set this parameter to ignore all kexec
+			compatibility checks on arm64 systems.  Setting this
+			could cause the system to become unstable after a kexec
+			re-boot.  If unsure, do not set.
+
+			For some systems the compatibility checks may be too
+			restrictive, and setting this parameter could allow
+			those systems to kexec to kernel images that would not
+			pass the compatibility checks.
+
 	kgdbdbgp=	[KGDB,HW] kgdb over EHCI usb debug port.
 			Format: <Controller#>[,poll interval]
 			The controller # is the number of the ehci usb debug
diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
index 043a3bc..45c2db2 100644
--- a/arch/arm64/kernel/machine_kexec.c
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -36,6 +36,20 @@ extern unsigned long kexec_dtb_addr;
 extern unsigned long kexec_kimage_head;
 extern unsigned long kexec_kimage_start;
 
+/*
+ * kexec_ignore_compat_check - Set to ignore kexec compatibility checks.
+ */
+
+static int __read_mostly kexec_ignore_compat_check;
+
+static int __init setup_kexec_ignore_compat_check(char *__unused)
+{
+	kexec_ignore_compat_check = 1;
+	return 1;
+}
+
+__setup("kexec_ignore_compat_check", setup_kexec_ignore_compat_check);
+
 /**
  * struct kexec_boot_info - Boot info needed by the local kexec routines.
  */
@@ -410,7 +424,8 @@ static int kexec_compat_check(const struct kexec_ctx *ctx)
 			if (cp_1->hwid != cp_2->hwid)
 				continue;
 
-			if (!kexec_cpu_check(cp_1, cp_2))
+			if (!kexec_cpu_check(cp_1, cp_2) &&
+				!kexec_ignore_compat_check)
 				return -EINVAL;
 
 			to_process--;
@@ -506,7 +521,7 @@ int machine_kexec_prepare(struct kimage *image)
 
 	result = kexec_compat_check(ctx);
 
-	if (result)
+	if (result && !kexec_ignore_compat_check)
 		goto on_error;
 
 	kexec_dtb_addr = dtb_seg->mem;
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [PATCH 11/13] arm64/kexec: Add core kexec support
  2014-09-09 22:51 ` Geoff Levand
@ 2014-09-09 22:49   ` Geoff Levand
  -1 siblings, 0 replies; 80+ messages in thread
From: Geoff Levand @ 2014-09-09 22:49 UTC (permalink / raw)
  To: linux-arm-kernel

Add three new files, kexec.h, machine_kexec.c and relocate_kernel.S to the
arm64 architecture that add support for the kexec re-boot mechanism
(CONFIG_KEXEC) on arm64 platforms.

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/Kconfig                  |   8 +
 arch/arm64/include/asm/kexec.h      |  52 +++
 arch/arm64/kernel/Makefile          |   2 +
 arch/arm64/kernel/machine_kexec.c   | 612 ++++++++++++++++++++++++++++++++++++
 arch/arm64/kernel/relocate_kernel.S | 185 +++++++++++
 include/uapi/linux/kexec.h          |   1 +
 6 files changed, 860 insertions(+)
 create mode 100644 arch/arm64/include/asm/kexec.h
 create mode 100644 arch/arm64/kernel/machine_kexec.c
 create mode 100644 arch/arm64/kernel/relocate_kernel.S

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index f0d3a2d..6f0e1f1 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -313,6 +313,14 @@ config ARCH_HAS_CACHE_LINE_SIZE
 
 source "mm/Kconfig"
 
+config KEXEC
+	bool "kexec system call"
+	---help---
+	  kexec is a system call that implements the ability to shutdown your
+	  current kernel, and to start another kernel.  It is like a reboot
+	  but it is independent of the system firmware.   And like a reboot
+	  you can start any kernel with it, not just Linux.
+
 config XEN_DOM0
 	def_bool y
 	depends on XEN
diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
new file mode 100644
index 0000000..9a3932c
--- /dev/null
+++ b/arch/arm64/include/asm/kexec.h
@@ -0,0 +1,52 @@
+/*
+ * kexec for arm64
+ *
+ * Copyright (C) Linaro.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#if !defined(_ARM64_KEXEC_H)
+#define _ARM64_KEXEC_H
+
+/* Maximum physical address we can use pages from */
+
+#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
+
+/* Maximum address we can reach in physical address mode */
+
+#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
+
+/* Maximum address we can use for the control code buffer */
+
+#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL)
+
+#define KEXEC_CONTROL_PAGE_SIZE	4096
+
+#define KEXEC_ARCH KEXEC_ARCH_ARM64
+
+#define ARCH_HAS_KIMAGE_ARCH
+
+#if !defined(__ASSEMBLY__)
+
+struct kimage_arch {
+	void *ctx;
+};
+
+/**
+ * crash_setup_regs() - save registers for the panic kernel
+ *
+ * @newregs: registers are saved here
+ * @oldregs: registers to be saved (may be %NULL)
+ */
+
+static inline void crash_setup_regs(struct pt_regs *newregs,
+				    struct pt_regs *oldregs)
+{
+}
+
+#endif /* !defined(__ASSEMBLY__) */
+
+#endif
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index df7ef87..8b7c029 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -29,6 +29,8 @@ arm64-obj-$(CONFIG_ARM64_CPU_SUSPEND)	+= sleep.o suspend.o
 arm64-obj-$(CONFIG_JUMP_LABEL)		+= jump_label.o
 arm64-obj-$(CONFIG_KGDB)		+= kgdb.o
 arm64-obj-$(CONFIG_EFI)			+= efi.o efi-stub.o efi-entry.o
+arm64-obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o    \
+					   cpu-properties.o
 
 obj-y					+= $(arm64-obj-y) vdso/
 obj-m					+= $(arm64-obj-m)
diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
new file mode 100644
index 0000000..043a3bc
--- /dev/null
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -0,0 +1,612 @@
+/*
+ * kexec for arm64
+ *
+ * Copyright (C) Linaro.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kexec.h>
+#include <linux/of_fdt.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+#include <asm/cacheflush.h>
+#include <asm/cpu_ops.h>
+#include <asm/system_misc.h>
+
+#include "cpu-properties.h"
+
+#if defined(DEBUG)
+static const int debug = 1;
+#else
+static const int debug;
+#endif
+
+typedef struct dtb_buffer {char b[0]; } dtb_t;
+
+/* Global variables for the relocate_kernel routine. */
+
+extern const unsigned char relocate_new_kernel[];
+extern const unsigned long relocate_new_kernel_size;
+extern unsigned long kexec_dtb_addr;
+extern unsigned long kexec_kimage_head;
+extern unsigned long kexec_kimage_start;
+
+/**
+ * struct kexec_boot_info - Boot info needed by the local kexec routines.
+ */
+
+struct kexec_boot_info {
+	unsigned int cpu_count;
+	struct cpu_properties *cp;
+};
+
+/**
+ * struct kexec_ctx - Kexec runtime context.
+ *
+ * @first: Info for the first stage kernel.
+ * @second: Info for the second stage kernel.
+ */
+
+struct kexec_ctx {
+	struct kexec_boot_info first;
+	struct kexec_boot_info second;
+};
+
+static struct kexec_ctx *kexec_image_to_ctx(struct kimage *image)
+{
+	return (struct kexec_ctx *)image->arch.ctx;
+}
+
+static struct kexec_ctx *current_ctx;
+
+static int kexec_ctx_alloc(struct kimage *image)
+{
+	BUG_ON(image->arch.ctx);
+
+	image->arch.ctx = kmalloc(sizeof(struct kexec_ctx), GFP_KERNEL);
+
+	if (!image->arch.ctx)
+		return -ENOMEM;
+
+	current_ctx = (struct kexec_ctx *)image->arch.ctx;
+
+	return 0;
+}
+
+static void kexec_ctx_free(struct kexec_ctx *ctx)
+{
+	if (!ctx)
+		return;
+
+	kfree(ctx->first.cp);
+	ctx->first.cp = NULL;
+
+	kfree(ctx->second.cp);
+	ctx->second.cp = NULL;
+
+	kfree(ctx);
+}
+
+static void kexec_ctx_clean(struct kimage *image)
+{
+	kexec_ctx_free(image->arch.ctx);
+	image->arch.ctx = NULL;
+}
+
+/**
+ * kexec_is_dtb - Helper routine to check the device tree header signature.
+ */
+
+static bool kexec_is_dtb(__be32 magic)
+{
+	return be32_to_cpu(magic) == OF_DT_HEADER;
+}
+
+/**
+ * kexec_is_dtb_user - For debugging output.
+ */
+
+static bool kexec_is_dtb_user(const dtb_t *dtb)
+{
+	__be32 magic;
+
+	return get_user(magic, (__be32 *)dtb) ? false : kexec_is_dtb(magic);
+}
+
+/**
+ * kexec_list_walk - Helper to walk the kimage page list.
+ */
+
+#define IND_FLAGS (IND_DESTINATION | IND_INDIRECTION | IND_DONE | IND_SOURCE)
+
+static void kexec_list_walk(void *ctx, unsigned long kimage_head,
+	void (*cb)(void *ctx, unsigned int flag, void *addr, void *dest))
+{
+	void *dest;
+	unsigned long *entry;
+
+	for (entry = &kimage_head, dest = NULL; ; entry++) {
+		unsigned int flag = *entry & IND_FLAGS;
+		void *addr = phys_to_virt(*entry & PAGE_MASK);
+
+		switch (flag) {
+		case IND_INDIRECTION:
+			entry = (unsigned long *)addr - 1;
+			cb(ctx, flag, addr, NULL);
+			break;
+		case IND_DESTINATION:
+			dest = addr;
+			cb(ctx, flag, addr, NULL);
+			break;
+		case IND_SOURCE:
+			cb(ctx, flag, addr, dest);
+			dest += PAGE_SIZE;
+			break;
+		case IND_DONE:
+			cb(ctx, flag , NULL, NULL);
+			return;
+		default:
+			pr_devel("%s:%d unknown flag %xh\n", __func__, __LINE__,
+				flag);
+			cb(ctx, flag, addr, NULL);
+			break;
+		}
+	}
+}
+
+/**
+ * kexec_image_info - For debugging output.
+ */
+
+#define kexec_image_info(_i) _kexec_image_info(__func__, __LINE__, _i)
+static void _kexec_image_info(const char *func, int line,
+	const struct kimage *image)
+{
+	if (debug) {
+		unsigned long i;
+
+		pr_devel("%s:%d:\n", func, line);
+		pr_devel("  kexec image info:\n");
+		pr_devel("    type:        %d\n", image->type);
+		pr_devel("    start:       %lx\n", image->start);
+		pr_devel("    head:        %lx\n", image->head);
+		pr_devel("    nr_segments: %lu\n", image->nr_segments);
+
+		for (i = 0; i < image->nr_segments; i++) {
+			pr_devel("      segment[%lu]: %016lx - %016lx, "
+				"%lxh bytes, %lu pages\n",
+				i,
+				image->segment[i].mem,
+				image->segment[i].mem + image->segment[i].memsz,
+				image->segment[i].memsz,
+				image->segment[i].memsz /  PAGE_SIZE);
+
+			if (kexec_is_dtb_user(image->segment[i].buf))
+				pr_devel("        dtb segment\n");
+		}
+	}
+}
+
+/**
+ * kexec_find_dtb_seg - Helper routine to find the dtb segment.
+ */
+
+static const struct kexec_segment *kexec_find_dtb_seg(
+	const struct kimage *image)
+{
+	int i;
+
+	for (i = 0; i < image->nr_segments; i++) {
+		if (kexec_is_dtb_user(image->segment[i].buf))
+			return &image->segment[i];
+	}
+
+	return NULL;
+}
+
+/**
+ * kexec_copy_dtb - Helper routine to copy dtb from user space.
+ */
+
+static int kexec_copy_dtb(const struct kexec_segment *seg, dtb_t **dtb)
+{
+	int result;
+
+	BUG_ON(!seg && !seg->bufsz);
+
+	*dtb = kmalloc(seg->bufsz, GFP_KERNEL);
+
+	if (!*dtb) {
+		pr_err("%s: Error: Out of memory.", __func__);
+		return -ENOMEM;
+	}
+
+	result = copy_from_user(*dtb, seg->buf, seg->bufsz);
+
+	if (result) {
+		pr_err("%s: Error: copy_from_user failed.", __func__);
+		kfree(*dtb);
+		*dtb = NULL;
+	}
+
+	return result;
+}
+
+/**
+ * kexec_cpu_info_init - Initialize an array of kexec_cpu_info structures.
+ *
+ * Allocates a cpu info array and fills it with info for all cpus found in
+ * the device tree passed.
+ */
+
+static int kexec_cpu_info_init(const struct device_node *dn,
+	struct kexec_boot_info *info)
+{
+	int result;
+	unsigned int cpu;
+
+	info->cp = kmalloc(
+		info->cpu_count * sizeof(*info->cp), GFP_KERNEL);
+
+	if (!info->cp) {
+		pr_err("%s: Error: Out of memory.", __func__);
+		return -ENOMEM;
+	}
+
+	for (cpu = 0; cpu < info->cpu_count; cpu++) {
+		struct cpu_properties *cp = &info->cp[cpu];
+
+		dn = of_find_node_by_type((struct device_node *)dn, "cpu");
+
+		if (!dn) {
+			pr_devel("%s:%d: bad node\n", __func__, __LINE__);
+			goto on_error;
+		}
+
+		result = read_cpu_properties(cp, dn);
+
+		if (result) {
+			pr_devel("%s:%d: bad node\n", __func__, __LINE__);
+			goto on_error;
+		}
+
+		if (cp->type == cpu_enable_method_psci)
+			pr_devel("%s:%d: cpu-%u: hwid-%llx, '%s'\n",
+				__func__, __LINE__, cpu, cp->hwid,
+				cp->enable_method);
+		else
+			pr_devel("%s:%d: cpu-%u: hwid-%llx, '%s', "
+				"cpu-release-addr %llx\n",
+				__func__, __LINE__, cpu, cp->hwid,
+				cp->enable_method,
+				cp->cpu_release_addr);
+	}
+
+	return 0;
+
+on_error:
+	kfree(info->cp);
+	info->cp = NULL;
+	return -EINVAL;
+}
+
+/**
+ * kexec_boot_info_init - Initialize a kexec_boot_info structure from a dtb.
+ */
+
+static int kexec_boot_info_init(struct kexec_boot_info *info, dtb_t *dtb)
+{
+	struct device_node *dn;
+	struct device_node *i;
+
+	if (!dtb) {
+		/* 1st stage. */
+		dn = NULL;
+	} else {
+		/* 2nd stage. */
+		of_fdt_unflatten_tree((void *)dtb, &dn);
+
+		if (!dn) {
+			pr_err("%s: Error: of_fdt_unflatten_tree failed.\n",
+				__func__);
+			return -EINVAL;
+		}
+	}
+
+	for (info->cpu_count = 0, i = dn; (i = of_find_node_by_type(i, "cpu"));
+		info->cpu_count++)
+		(void)0;
+
+	pr_devel("%s:%d: cpu_count: %u\n", __func__, __LINE__, info->cpu_count);
+
+	if (!info->cpu_count) {
+		pr_err("%s: Error: No cpu nodes found in device tree.\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	return kexec_cpu_info_init(dn, info);
+}
+
+/**
+* kexec_cpu_check - Helper to check compatibility of the 2nd stage kernel.
+*
+* Returns true if everything is OK.
+*/
+
+static bool kexec_cpu_check(struct cpu_properties *cp_1,
+	struct cpu_properties *cp_2)
+{
+	if (debug)
+		BUG_ON(cp_1->hwid != cp_2->hwid);
+
+	if (cp_1->type != cpu_enable_method_psci &&
+		cp_1->type != cpu_enable_method_spin_table) {
+		pr_err("%s:%d: hwid-%llx: Error: "
+			"Unknown enable method: %s.\n", __func__, __LINE__,
+			cp_1->hwid, cp_1->enable_method);
+		return false;
+	}
+
+	if (cp_2->type != cpu_enable_method_psci &&
+		cp_2->type != cpu_enable_method_spin_table) {
+		pr_err("%s:%d: hwid-%llx: Error: "
+			"Unknown enable method: %s.\n", __func__, __LINE__,
+			cp_2->hwid, cp_2->enable_method);
+		return false;
+	}
+
+	if (cp_1->type != cp_2->type) {
+		pr_err("%s:%d: hwid-%llx: Error: "
+			"Enable method mismatch: %s != %s.\n", __func__,
+			__LINE__, cp_1->hwid, cp_1->enable_method,
+			cp_2->enable_method);
+		return false;
+	}
+
+	if (cp_1->type == cpu_enable_method_spin_table) {
+		if (cp_1->cpu_release_addr != cp_2->cpu_release_addr) {
+			pr_err("%s:%d: hwid-%llx: Error: "
+				"cpu-release-addr mismatch %llx != %llx.\n",
+				__func__, __LINE__, cp_1->hwid,
+				cp_1->cpu_release_addr,
+				cp_2->cpu_release_addr);
+			return false;
+		}
+	}
+
+	pr_devel("%s:%d: hwid-%llx: OK\n", __func__, __LINE__, cp_1->hwid);
+
+	return true;
+}
+
+/**
+* kexec_compat_check - Iterator for kexec_cpu_check.
+*/
+
+static int kexec_compat_check(const struct kexec_ctx *ctx)
+{
+	unsigned int cpu_1;
+	unsigned int to_process;
+
+	to_process = min(ctx->first.cpu_count, ctx->second.cpu_count);
+
+	if (ctx->first.cpu_count != ctx->second.cpu_count)
+		pr_warn("%s: Warning: CPU count mismatch %u != %u.\n",
+			__func__, ctx->first.cpu_count, ctx->second.cpu_count);
+
+	for (cpu_1 = 0; cpu_1 < ctx->first.cpu_count; cpu_1++) {
+		unsigned int cpu_2;
+		struct cpu_properties *cp_1 = &ctx->first.cp[cpu_1];
+
+		for (cpu_2 = 0; cpu_2 < ctx->second.cpu_count; cpu_2++) {
+			struct cpu_properties *cp_2 = &ctx->second.cp[cpu_2];
+
+			if (cp_1->hwid != cp_2->hwid)
+				continue;
+
+			if (!kexec_cpu_check(cp_1, cp_2))
+				return -EINVAL;
+
+			to_process--;
+		}
+	}
+
+	if (to_process) {
+		pr_warn("%s: Warning: Failed to process %u CPUs.\n", __func__,
+			to_process);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+void machine_kexec_cleanup(struct kimage *image)
+{
+	kexec_ctx_clean(image);
+}
+
+/**
+ * kexec_check_cpu_die -  Check if cpu_die() will work on secondary processors.
+ */
+
+static int kexec_check_cpu_die(void)
+{
+	unsigned int cpu;
+	unsigned int sum = 0;
+
+	/* For simplicity this also checks the primary CPU. */
+
+	for_each_cpu(cpu, cpu_all_mask) {
+		if (cpu && (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_disable ||
+			cpu_ops[cpu]->cpu_disable(cpu))) {
+			sum++;
+			pr_err("%s: Error: "
+				"CPU %u does not support hot un-plug.\n",
+				__func__, cpu);
+		}
+	}
+
+	return sum ? -EOPNOTSUPP : 0;
+}
+
+/**
+ * machine_kexec_prepare - Prepare for a kexec reboot.
+ *
+ * Called from the core kexec code when a kernel image is loaded.
+ */
+
+int machine_kexec_prepare(struct kimage *image)
+{
+	int result;
+	dtb_t *dtb = NULL;
+	struct kexec_ctx *ctx;
+	const struct kexec_segment *dtb_seg;
+
+	kexec_image_info(image);
+
+	result = kexec_check_cpu_die();
+
+	if (result)
+		goto on_error;
+
+	result = kexec_ctx_alloc(image);
+
+	if (result)
+		goto on_error;
+
+	ctx = kexec_image_to_ctx(image);
+
+	result = kexec_boot_info_init(&ctx->first, NULL);
+
+	if (result)
+		goto on_error;
+
+	dtb_seg = kexec_find_dtb_seg(image);
+
+	if (!dtb_seg) {
+		result = -EINVAL;
+		goto on_error;
+	}
+
+	result = kexec_copy_dtb(dtb_seg, &dtb);
+
+	if (result)
+		goto on_error;
+
+	result = kexec_boot_info_init(&ctx->second, dtb);
+
+	if (result)
+		goto on_error;
+
+	result = kexec_compat_check(ctx);
+
+	if (result)
+		goto on_error;
+
+	kexec_dtb_addr = dtb_seg->mem;
+	kexec_kimage_start = image->start;
+
+	goto on_exit;
+
+on_error:
+	kexec_ctx_clean(image);
+on_exit:
+	kfree(dtb);
+	return result;
+}
+
+/**
+ * kexec_list_flush_cb - Callback to flush the kimage list to PoC.
+ */
+
+static void kexec_list_flush_cb(void *ctx , unsigned int flag,
+	void *addr, void *dest)
+{
+	switch (flag) {
+	case IND_INDIRECTION:
+	case IND_SOURCE:
+		__flush_dcache_area(addr, PAGE_SIZE);
+		break;
+	default:
+		break;
+	}
+}
+
+/**
+ * machine_kexec - Do the kexec reboot.
+ *
+ * Called from the core kexec code for a sys_reboot with LINUX_REBOOT_CMD_KEXEC.
+ */
+
+void machine_kexec(struct kimage *image)
+{
+	phys_addr_t reboot_code_buffer_phys;
+	void *reboot_code_buffer;
+	struct kexec_ctx *ctx = kexec_image_to_ctx(image);
+
+	BUG_ON(relocate_new_kernel_size > KEXEC_CONTROL_PAGE_SIZE);
+	BUG_ON(num_online_cpus() > 1);
+	BUG_ON(!ctx);
+
+	kexec_image_info(image);
+
+	kexec_kimage_head = image->head;
+
+	reboot_code_buffer_phys = page_to_phys(image->control_code_page);
+	reboot_code_buffer = phys_to_virt(reboot_code_buffer_phys);
+
+	pr_devel("%s:%d: control_code_page:        %p\n", __func__, __LINE__,
+		(void *)image->control_code_page);
+	pr_devel("%s:%d: reboot_code_buffer_phys:  %p\n", __func__, __LINE__,
+		(void *)reboot_code_buffer_phys);
+	pr_devel("%s:%d: reboot_code_buffer:       %p\n", __func__, __LINE__,
+		reboot_code_buffer);
+	pr_devel("%s:%d: relocate_new_kernel:      %p\n", __func__, __LINE__,
+		relocate_new_kernel);
+	pr_devel("%s:%d: relocate_new_kernel_size: %lxh(%lu) bytes\n", __func__,
+		__LINE__, relocate_new_kernel_size, relocate_new_kernel_size);
+
+	pr_devel("%s:%d: kexec_dtb_addr:           %p\n", __func__, __LINE__,
+		(void *)kexec_dtb_addr);
+	pr_devel("%s:%d: kexec_kimage_head:        %p\n", __func__, __LINE__,
+		(void *)kexec_kimage_head);
+	pr_devel("%s:%d: kexec_kimage_start:       %p\n", __func__, __LINE__,
+		(void *)kexec_kimage_start);
+
+	/*
+	 * Copy relocate_new_kernel to the reboot_code_buffer for use
+	 * after the kernel is shut down.
+	 */
+
+	memcpy(reboot_code_buffer, relocate_new_kernel,
+		relocate_new_kernel_size);
+
+	/* Assure reboot_code_buffer is copied. */
+
+	mb();
+
+	pr_info("Bye!\n");
+
+	local_disable(DAIF_ALL);
+
+	/* Flush the reboot_code_buffer in preparation for its execution. */
+
+	__flush_dcache_area(reboot_code_buffer, relocate_new_kernel_size);
+
+	/* Flush the kimage list. */
+
+	kexec_list_walk(NULL, image->head, kexec_list_flush_cb);
+
+	soft_restart(reboot_code_buffer_phys);
+}
+
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+	/* Empty routine needed to avoid build errors. */
+}
diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S
new file mode 100644
index 0000000..92aba9d
--- /dev/null
+++ b/arch/arm64/kernel/relocate_kernel.S
@@ -0,0 +1,185 @@
+/*
+ * kexec for arm64
+ *
+ * Copyright (C) Linaro.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/assembler.h>
+#include <asm/memory.h>
+#include <asm/page.h>
+
+/* The list entry flags. */
+
+#define IND_DESTINATION_BIT 0
+#define IND_INDIRECTION_BIT 1
+#define IND_DONE_BIT        2
+#define IND_SOURCE_BIT      3
+
+/*
+ * relocate_new_kernel - Put the 2nd stage kernel image in place and boot it.
+ *
+ * The memory that the old kernel occupies may be overwritten when coping the
+ * new kernel to its final location.  To assure that the relocate_new_kernel
+ * routine which does that copy is not overwritten all code and data needed
+ * by relocate_new_kernel must be between the symbols relocate_new_kernel and
+ * relocate_new_kernel_end.  The machine_kexec() routine will copy
+ * relocate_new_kernel to the kexec control_code_page, a special page which
+ * has been set up to be preserved during the kernel copy operation.
+ */
+
+.align 3
+
+.globl relocate_new_kernel
+relocate_new_kernel:
+
+	/* Setup the list loop variables. */
+
+	ldr	x10, kexec_kimage_head		/* x10 = list entry */
+
+	mrs	x0, ctr_el0
+	ubfm	x0, x0, #16, #19
+	mov	x11, #4
+	lsl	x11, x11, x0			/* x11 = dcache line size */
+
+	mov	x12, xzr			/* x12 = segment start */
+	mov	x13, xzr			/* x13 = entry ptr */
+	mov	x14, xzr			/* x14 = copy dest */
+
+	/* Check if the new kernel needs relocation. */
+
+	cbz	x10, .Ldone
+	tbnz	x10, IND_DONE_BIT, .Ldone
+
+.Lloop:
+	and	x15, x10, PAGE_MASK		/* x15 = addr */
+
+	/* Test the entry flags. */
+
+.Ltest_source:
+	tbz	x10, IND_SOURCE_BIT, .Ltest_indirection
+
+	/* copy_page(x20 = dest, x21 = src) */
+
+	mov x20, x14
+	mov x21, x15
+
+1:	ldp	x22, x23, [x21]
+	ldp	x24, x25, [x21, #16]
+	ldp	x26, x27, [x21, #32]
+	ldp	x28, x29, [x21, #48]
+	add	x21, x21, #64
+	stnp	x22, x23, [x20]
+	stnp	x24, x25, [x20, #16]
+	stnp	x26, x27, [x20, #32]
+	stnp	x28, x29, [x20, #48]
+	add	x20, x20, #64
+	tst	x21, #(PAGE_SIZE - 1)
+	b.ne	1b
+
+	/* dest += PAGE_SIZE */
+
+	add	x14, x14, PAGE_SIZE
+	b	.Lnext
+
+.Ltest_indirection:
+	tbz	x10, IND_INDIRECTION_BIT, .Ltest_destination
+
+	/* ptr = addr */
+
+	mov	x13, x15
+	b	.Lnext
+
+.Ltest_destination:
+	tbz	x10, IND_DESTINATION_BIT, .Lnext
+
+	/* flush segment */
+
+	bl	.Lflush
+	mov	x12, x15
+
+	/* dest = addr */
+
+	mov	x14, x15
+
+.Lnext:
+	/* entry = *ptr++ */
+
+	ldr	x10, [x13]
+	add	x13, x13, 8
+
+	/* while (!(entry & DONE)) */
+
+	tbz	x10, IND_DONE_BIT, .Lloop
+
+.Ldone:
+	/* flush last segment */
+
+	bl	.Lflush
+
+	dsb	sy
+	isb
+	ic	ialluis
+
+	/* start_new_kernel */
+
+	ldr	x4, kexec_kimage_start
+	ldr	x0, kexec_dtb_addr
+	mov	x1, xzr
+	mov	x2, xzr
+	mov	x3, xzr
+	br	x4
+
+/* flush - x11 = line size, x12 = start addr, x14 = end addr. */
+
+.Lflush:
+	cbz	x12, 2f
+	mov	x0, x12
+	sub	x1, x11, #1
+	bic	x0, x0, x1
+1:	dc	civac, x0
+	add	x0, x0, x11
+	cmp	x0, x14
+	b.lo	1b
+2:	ret
+
+.align 3
+
+/* The machine_kexec routines set these variables. */
+
+/*
+ * kexec_dtb_addr - Physical address of the new kernel's device tree.
+ */
+
+.globl kexec_dtb_addr
+kexec_dtb_addr:
+	.quad	0x0
+
+/*
+ * kexec_kimage_head - Copy of image->head, the list of kimage entries.
+ */
+
+.globl kexec_kimage_head
+kexec_kimage_head:
+	.quad	0x0
+
+/*
+ * kexec_kimage_start - Copy of image->start, the entry point of the new kernel.
+ */
+
+.globl kexec_kimage_start
+kexec_kimage_start:
+	.quad	0x0
+
+.Lrelocate_new_kernel_end:
+
+/*
+ * relocate_new_kernel_size - Number of bytes to copy to the control_code_page.
+ */
+
+.globl relocate_new_kernel_size
+relocate_new_kernel_size:
+	.quad .Lrelocate_new_kernel_end - relocate_new_kernel
diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h
index 6925f5b..04626b9 100644
--- a/include/uapi/linux/kexec.h
+++ b/include/uapi/linux/kexec.h
@@ -39,6 +39,7 @@
 #define KEXEC_ARCH_SH      (42 << 16)
 #define KEXEC_ARCH_MIPS_LE (10 << 16)
 #define KEXEC_ARCH_MIPS    ( 8 << 16)
+#define KEXEC_ARCH_ARM64   (183 << 16)
 
 /* The artificial cap on the number of segments passed to kexec_load. */
 #define KEXEC_SEGMENT_MAX 16
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [PATCH 12/13] arm64/kexec: Enable kexec in the arm64 defconfig
  2014-09-09 22:51 ` Geoff Levand
@ 2014-09-09 22:49   ` Geoff Levand
  -1 siblings, 0 replies; 80+ messages in thread
From: Geoff Levand @ 2014-09-09 22:49 UTC (permalink / raw)
  To: linux-arm-kernel

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/configs/defconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index d92ef3c..ebf8b3f 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -39,6 +39,7 @@ CONFIG_PREEMPT=y
 CONFIG_KSM=y
 CONFIG_TRANSPARENT_HUGEPAGE=y
 CONFIG_CMA=y
+CONFIG_KEXEC=y
 CONFIG_CMDLINE="console=ttyAMA0"
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
 CONFIG_COMPAT=y
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [PATCH 08/13] arm64: Use cpu_ops for smp_stop
@ 2014-09-09 22:49   ` Geoff Levand
  0 siblings, 0 replies; 80+ messages in thread
From: Geoff Levand @ 2014-09-09 22:49 UTC (permalink / raw)
  To: Catalin Marinas, Will Deacon
  Cc: marc.zyngier, kexec, linux-arm-kernel, christoffer.dall

The current implementation of ipi_cpu_stop() is just a tight infinite loop
around cpu_relax().  This infinite loop implementation is OK if the machine
will soon do a poweroff, but it doesn't have any mechanism to allow a CPU
to be brought back on-line, nor is it compatible with kexec re-boot.

Add a check for a valid cpu_die method of the appropriate cpu_ops structure,
and if a valid method is found, transfer control to that method.  It is
expected that the cpu_die method puts the CPU into a state such that they can
be brought back on-line or progress through a kexec re-boot.

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/kernel/smp.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 4743397..002aa8a 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -555,6 +555,15 @@ static void ipi_cpu_stop(unsigned int cpu)
 
 	local_irq_disable();
 
+	/* If we have the cpu ops use them. */
+
+	if (cpu_ops[cpu]->cpu_disable &&
+	    cpu_ops[cpu]->cpu_die &&
+	    !cpu_ops[cpu]->cpu_disable(cpu))
+		cpu_ops[cpu]->cpu_die(cpu);
+
+	/* Otherwise spin here. */
+
 	while (1)
 		cpu_relax();
 }
-- 
1.9.1



_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [PATCH 10/13] arm64/kexec: Revert change to machine_shutdown()
@ 2014-09-09 22:49   ` Geoff Levand
  0 siblings, 0 replies; 80+ messages in thread
From: Geoff Levand @ 2014-09-09 22:49 UTC (permalink / raw)
  To: Catalin Marinas, Will Deacon
  Cc: marc.zyngier, kexec, linux-arm-kernel, christoffer.dall

Commit 90f51a09ef83 ("arm64: Fix machine_shutdown() definition") changed the
implementation of machine_shutdown() from calling smp_send_stop() to
calling disable_nonboot_cpus().  Revert that part of the commit so that
machine_shutdown() once again calls smp_send_stop().

With the application of another patch in this series, (arm64: Use cpu_ops for
smp_stop), smp_send_stop() will do the correct thing for a kexec reboot. This
change also corrects the source code comment for the machine_shutdown() routine.

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/kernel/process.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 0a3414b..cd0ae9d 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -101,15 +101,12 @@ void arch_cpu_idle_dead(void)
 /*
  * Called by kexec, immediately prior to machine_kexec().
  *
- * This must completely disable all secondary CPUs; simply causing those CPUs
- * to execute e.g. a RAM-based pin loop is not sufficient. This allows the
- * kexec'd kernel to use any and all RAM as it sees fit, without having to
- * avoid any code or data used by any SW CPU pin loop. The CPU hotplug
- * functionality embodied in disable_nonboot_cpus() to achieve this.
+ * This must shutdown all secondary CPUs.  The functionality
+ * embodied in smp_send_stop() will achieve this.
  */
 void machine_shutdown(void)
 {
-	disable_nonboot_cpus();
+	smp_send_stop();
 }
 
 /*
-- 
1.9.1



_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [PATCH 09/13] arm64/kexec: Kexec expects cpu_die
@ 2014-09-09 22:49   ` Geoff Levand
  0 siblings, 0 replies; 80+ messages in thread
From: Geoff Levand @ 2014-09-09 22:49 UTC (permalink / raw)
  To: Catalin Marinas, Will Deacon
  Cc: marc.zyngier, kexec, linux-arm-kernel, christoffer.dall

The arm64 implementation of kexec expects an operational cpu_die method of
struct cpu_operations, so add defined(CONFIG_KEXEC) to the preprocessor
conditional that enables cpu_die.

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/include/asm/cpu_ops.h | 2 +-
 arch/arm64/kernel/psci.c         | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h
index d7b4b38..9fd7281 100644
--- a/arch/arm64/include/asm/cpu_ops.h
+++ b/arch/arm64/include/asm/cpu_ops.h
@@ -50,7 +50,7 @@ struct cpu_operations {
 	int		(*cpu_prepare)(unsigned int);
 	int		(*cpu_boot)(unsigned int);
 	void		(*cpu_postboot)(void);
-#ifdef CONFIG_HOTPLUG_CPU
+#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_KEXEC)
 	int		(*cpu_disable)(unsigned int cpu);
 	void		(*cpu_die)(unsigned int cpu);
 	int		(*cpu_kill)(unsigned int cpu);
diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c
index 5539547..81dbbc9 100644
--- a/arch/arm64/kernel/psci.c
+++ b/arch/arm64/kernel/psci.c
@@ -380,7 +380,7 @@ static int cpu_psci_cpu_boot(unsigned int cpu)
 	return err;
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
+#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_KEXEC)
 static int cpu_psci_cpu_disable(unsigned int cpu)
 {
 	/* Fail early if we don't have CPU_OFF support */
@@ -442,7 +442,7 @@ const struct cpu_operations cpu_psci_ops = {
 	.cpu_init	= cpu_psci_cpu_init,
 	.cpu_prepare	= cpu_psci_cpu_prepare,
 	.cpu_boot	= cpu_psci_cpu_boot,
-#ifdef CONFIG_HOTPLUG_CPU
+#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_KEXEC)
 	.cpu_disable	= cpu_psci_cpu_disable,
 	.cpu_die	= cpu_psci_cpu_die,
 	.cpu_kill	= cpu_psci_cpu_kill,
-- 
1.9.1



_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [PATCH 13/13] arm64/kexec: Add kexec_ignore_compat_check param
@ 2014-09-09 22:49   ` Geoff Levand
  0 siblings, 0 replies; 80+ messages in thread
From: Geoff Levand @ 2014-09-09 22:49 UTC (permalink / raw)
  To: Catalin Marinas, Will Deacon
  Cc: marc.zyngier, kexec, linux-arm-kernel, christoffer.dall

Add the new kernel command line parameter kexec_ignore_compat_check which
specifies whether or not to ignore arm64 kexec compatibility checks.

For some systems the compatibility checks may be too restrictive, and setting
this parameter could allow those systems to kexec to kernel images that would
not pass the compatibility checks.

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 Documentation/kernel-parameters.txt | 13 +++++++++++++
 arch/arm64/kernel/machine_kexec.c   | 19 +++++++++++++++++--
 2 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 5ae8608..c70f4b8 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1529,6 +1529,19 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			use the HighMem zone if it exists, and the Normal
 			zone if it does not.
 
+	kexec_ignore_compat_check [KEXEC,ARM64]
+			This parameter specifies whether or not to ignore arm64
+			kexec compatibility checks.  The default is to honor the
+			checks.  Set this parameter to ignore all kexec
+			compatibility checks on arm64 systems.  Setting this
+			could cause the system to become unstable after a kexec
+			re-boot.  If unsure, do not set.
+
+			For some systems the compatibility checks may be too
+			restrictive, and setting this parameter could allow
+			those systems to kexec to kernel images that would not
+			pass the compatibility checks.
+
 	kgdbdbgp=	[KGDB,HW] kgdb over EHCI usb debug port.
 			Format: <Controller#>[,poll interval]
 			The controller # is the number of the ehci usb debug
diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
index 043a3bc..45c2db2 100644
--- a/arch/arm64/kernel/machine_kexec.c
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -36,6 +36,20 @@ extern unsigned long kexec_dtb_addr;
 extern unsigned long kexec_kimage_head;
 extern unsigned long kexec_kimage_start;
 
+/*
+ * kexec_ignore_compat_check - Set to ignore kexec compatibility checks.
+ */
+
+static int __read_mostly kexec_ignore_compat_check;
+
+static int __init setup_kexec_ignore_compat_check(char *__unused)
+{
+	kexec_ignore_compat_check = 1;
+	return 1;
+}
+
+__setup("kexec_ignore_compat_check", setup_kexec_ignore_compat_check);
+
 /**
  * struct kexec_boot_info - Boot info needed by the local kexec routines.
  */
@@ -410,7 +424,8 @@ static int kexec_compat_check(const struct kexec_ctx *ctx)
 			if (cp_1->hwid != cp_2->hwid)
 				continue;
 
-			if (!kexec_cpu_check(cp_1, cp_2))
+			if (!kexec_cpu_check(cp_1, cp_2) &&
+				!kexec_ignore_compat_check)
 				return -EINVAL;
 
 			to_process--;
@@ -506,7 +521,7 @@ int machine_kexec_prepare(struct kimage *image)
 
 	result = kexec_compat_check(ctx);
 
-	if (result)
+	if (result && !kexec_ignore_compat_check)
 		goto on_error;
 
 	kexec_dtb_addr = dtb_seg->mem;
-- 
1.9.1


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [PATCH 11/13] arm64/kexec: Add core kexec support
@ 2014-09-09 22:49   ` Geoff Levand
  0 siblings, 0 replies; 80+ messages in thread
From: Geoff Levand @ 2014-09-09 22:49 UTC (permalink / raw)
  To: Catalin Marinas, Will Deacon
  Cc: marc.zyngier, kexec, linux-arm-kernel, christoffer.dall

Add three new files, kexec.h, machine_kexec.c and relocate_kernel.S to the
arm64 architecture that add support for the kexec re-boot mechanism
(CONFIG_KEXEC) on arm64 platforms.

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/Kconfig                  |   8 +
 arch/arm64/include/asm/kexec.h      |  52 +++
 arch/arm64/kernel/Makefile          |   2 +
 arch/arm64/kernel/machine_kexec.c   | 612 ++++++++++++++++++++++++++++++++++++
 arch/arm64/kernel/relocate_kernel.S | 185 +++++++++++
 include/uapi/linux/kexec.h          |   1 +
 6 files changed, 860 insertions(+)
 create mode 100644 arch/arm64/include/asm/kexec.h
 create mode 100644 arch/arm64/kernel/machine_kexec.c
 create mode 100644 arch/arm64/kernel/relocate_kernel.S

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index f0d3a2d..6f0e1f1 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -313,6 +313,14 @@ config ARCH_HAS_CACHE_LINE_SIZE
 
 source "mm/Kconfig"
 
+config KEXEC
+	bool "kexec system call"
+	---help---
+	  kexec is a system call that implements the ability to shutdown your
+	  current kernel, and to start another kernel.  It is like a reboot
+	  but it is independent of the system firmware.   And like a reboot
+	  you can start any kernel with it, not just Linux.
+
 config XEN_DOM0
 	def_bool y
 	depends on XEN
diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
new file mode 100644
index 0000000..9a3932c
--- /dev/null
+++ b/arch/arm64/include/asm/kexec.h
@@ -0,0 +1,52 @@
+/*
+ * kexec for arm64
+ *
+ * Copyright (C) Linaro.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#if !defined(_ARM64_KEXEC_H)
+#define _ARM64_KEXEC_H
+
+/* Maximum physical address we can use pages from */
+
+#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
+
+/* Maximum address we can reach in physical address mode */
+
+#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
+
+/* Maximum address we can use for the control code buffer */
+
+#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL)
+
+#define KEXEC_CONTROL_PAGE_SIZE	4096
+
+#define KEXEC_ARCH KEXEC_ARCH_ARM64
+
+#define ARCH_HAS_KIMAGE_ARCH
+
+#if !defined(__ASSEMBLY__)
+
+struct kimage_arch {
+	void *ctx;
+};
+
+/**
+ * crash_setup_regs() - save registers for the panic kernel
+ *
+ * @newregs: registers are saved here
+ * @oldregs: registers to be saved (may be %NULL)
+ */
+
+static inline void crash_setup_regs(struct pt_regs *newregs,
+				    struct pt_regs *oldregs)
+{
+}
+
+#endif /* !defined(__ASSEMBLY__) */
+
+#endif
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index df7ef87..8b7c029 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -29,6 +29,8 @@ arm64-obj-$(CONFIG_ARM64_CPU_SUSPEND)	+= sleep.o suspend.o
 arm64-obj-$(CONFIG_JUMP_LABEL)		+= jump_label.o
 arm64-obj-$(CONFIG_KGDB)		+= kgdb.o
 arm64-obj-$(CONFIG_EFI)			+= efi.o efi-stub.o efi-entry.o
+arm64-obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o    \
+					   cpu-properties.o
 
 obj-y					+= $(arm64-obj-y) vdso/
 obj-m					+= $(arm64-obj-m)
diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
new file mode 100644
index 0000000..043a3bc
--- /dev/null
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -0,0 +1,612 @@
+/*
+ * kexec for arm64
+ *
+ * Copyright (C) Linaro.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kexec.h>
+#include <linux/of_fdt.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+#include <asm/cacheflush.h>
+#include <asm/cpu_ops.h>
+#include <asm/system_misc.h>
+
+#include "cpu-properties.h"
+
+#if defined(DEBUG)
+static const int debug = 1;
+#else
+static const int debug;
+#endif
+
+typedef struct dtb_buffer {char b[0]; } dtb_t;
+
+/* Global variables for the relocate_kernel routine. */
+
+extern const unsigned char relocate_new_kernel[];
+extern const unsigned long relocate_new_kernel_size;
+extern unsigned long kexec_dtb_addr;
+extern unsigned long kexec_kimage_head;
+extern unsigned long kexec_kimage_start;
+
+/**
+ * struct kexec_boot_info - Boot info needed by the local kexec routines.
+ */
+
+struct kexec_boot_info {
+	unsigned int cpu_count;
+	struct cpu_properties *cp;
+};
+
+/**
+ * struct kexec_ctx - Kexec runtime context.
+ *
+ * @first: Info for the first stage kernel.
+ * @second: Info for the second stage kernel.
+ */
+
+struct kexec_ctx {
+	struct kexec_boot_info first;
+	struct kexec_boot_info second;
+};
+
+static struct kexec_ctx *kexec_image_to_ctx(struct kimage *image)
+{
+	return (struct kexec_ctx *)image->arch.ctx;
+}
+
+static struct kexec_ctx *current_ctx;
+
+static int kexec_ctx_alloc(struct kimage *image)
+{
+	BUG_ON(image->arch.ctx);
+
+	image->arch.ctx = kmalloc(sizeof(struct kexec_ctx), GFP_KERNEL);
+
+	if (!image->arch.ctx)
+		return -ENOMEM;
+
+	current_ctx = (struct kexec_ctx *)image->arch.ctx;
+
+	return 0;
+}
+
+static void kexec_ctx_free(struct kexec_ctx *ctx)
+{
+	if (!ctx)
+		return;
+
+	kfree(ctx->first.cp);
+	ctx->first.cp = NULL;
+
+	kfree(ctx->second.cp);
+	ctx->second.cp = NULL;
+
+	kfree(ctx);
+}
+
+static void kexec_ctx_clean(struct kimage *image)
+{
+	kexec_ctx_free(image->arch.ctx);
+	image->arch.ctx = NULL;
+}
+
+/**
+ * kexec_is_dtb - Helper routine to check the device tree header signature.
+ */
+
+static bool kexec_is_dtb(__be32 magic)
+{
+	return be32_to_cpu(magic) == OF_DT_HEADER;
+}
+
+/**
+ * kexec_is_dtb_user - For debugging output.
+ */
+
+static bool kexec_is_dtb_user(const dtb_t *dtb)
+{
+	__be32 magic;
+
+	return get_user(magic, (__be32 *)dtb) ? false : kexec_is_dtb(magic);
+}
+
+/**
+ * kexec_list_walk - Helper to walk the kimage page list.
+ */
+
+#define IND_FLAGS (IND_DESTINATION | IND_INDIRECTION | IND_DONE | IND_SOURCE)
+
+static void kexec_list_walk(void *ctx, unsigned long kimage_head,
+	void (*cb)(void *ctx, unsigned int flag, void *addr, void *dest))
+{
+	void *dest;
+	unsigned long *entry;
+
+	for (entry = &kimage_head, dest = NULL; ; entry++) {
+		unsigned int flag = *entry & IND_FLAGS;
+		void *addr = phys_to_virt(*entry & PAGE_MASK);
+
+		switch (flag) {
+		case IND_INDIRECTION:
+			entry = (unsigned long *)addr - 1;
+			cb(ctx, flag, addr, NULL);
+			break;
+		case IND_DESTINATION:
+			dest = addr;
+			cb(ctx, flag, addr, NULL);
+			break;
+		case IND_SOURCE:
+			cb(ctx, flag, addr, dest);
+			dest += PAGE_SIZE;
+			break;
+		case IND_DONE:
+			cb(ctx, flag , NULL, NULL);
+			return;
+		default:
+			pr_devel("%s:%d unknown flag %xh\n", __func__, __LINE__,
+				flag);
+			cb(ctx, flag, addr, NULL);
+			break;
+		}
+	}
+}
+
+/**
+ * kexec_image_info - For debugging output.
+ */
+
+#define kexec_image_info(_i) _kexec_image_info(__func__, __LINE__, _i)
+static void _kexec_image_info(const char *func, int line,
+	const struct kimage *image)
+{
+	if (debug) {
+		unsigned long i;
+
+		pr_devel("%s:%d:\n", func, line);
+		pr_devel("  kexec image info:\n");
+		pr_devel("    type:        %d\n", image->type);
+		pr_devel("    start:       %lx\n", image->start);
+		pr_devel("    head:        %lx\n", image->head);
+		pr_devel("    nr_segments: %lu\n", image->nr_segments);
+
+		for (i = 0; i < image->nr_segments; i++) {
+			pr_devel("      segment[%lu]: %016lx - %016lx, "
+				"%lxh bytes, %lu pages\n",
+				i,
+				image->segment[i].mem,
+				image->segment[i].mem + image->segment[i].memsz,
+				image->segment[i].memsz,
+				image->segment[i].memsz /  PAGE_SIZE);
+
+			if (kexec_is_dtb_user(image->segment[i].buf))
+				pr_devel("        dtb segment\n");
+		}
+	}
+}
+
+/**
+ * kexec_find_dtb_seg - Helper routine to find the dtb segment.
+ */
+
+static const struct kexec_segment *kexec_find_dtb_seg(
+	const struct kimage *image)
+{
+	int i;
+
+	for (i = 0; i < image->nr_segments; i++) {
+		if (kexec_is_dtb_user(image->segment[i].buf))
+			return &image->segment[i];
+	}
+
+	return NULL;
+}
+
+/**
+ * kexec_copy_dtb - Helper routine to copy dtb from user space.
+ */
+
+static int kexec_copy_dtb(const struct kexec_segment *seg, dtb_t **dtb)
+{
+	int result;
+
+	BUG_ON(!seg && !seg->bufsz);
+
+	*dtb = kmalloc(seg->bufsz, GFP_KERNEL);
+
+	if (!*dtb) {
+		pr_err("%s: Error: Out of memory.", __func__);
+		return -ENOMEM;
+	}
+
+	result = copy_from_user(*dtb, seg->buf, seg->bufsz);
+
+	if (result) {
+		pr_err("%s: Error: copy_from_user failed.", __func__);
+		kfree(*dtb);
+		*dtb = NULL;
+	}
+
+	return result;
+}
+
+/**
+ * kexec_cpu_info_init - Initialize an array of kexec_cpu_info structures.
+ *
+ * Allocates a cpu info array and fills it with info for all cpus found in
+ * the device tree passed.
+ */
+
+static int kexec_cpu_info_init(const struct device_node *dn,
+	struct kexec_boot_info *info)
+{
+	int result;
+	unsigned int cpu;
+
+	info->cp = kmalloc(
+		info->cpu_count * sizeof(*info->cp), GFP_KERNEL);
+
+	if (!info->cp) {
+		pr_err("%s: Error: Out of memory.", __func__);
+		return -ENOMEM;
+	}
+
+	for (cpu = 0; cpu < info->cpu_count; cpu++) {
+		struct cpu_properties *cp = &info->cp[cpu];
+
+		dn = of_find_node_by_type((struct device_node *)dn, "cpu");
+
+		if (!dn) {
+			pr_devel("%s:%d: bad node\n", __func__, __LINE__);
+			goto on_error;
+		}
+
+		result = read_cpu_properties(cp, dn);
+
+		if (result) {
+			pr_devel("%s:%d: bad node\n", __func__, __LINE__);
+			goto on_error;
+		}
+
+		if (cp->type == cpu_enable_method_psci)
+			pr_devel("%s:%d: cpu-%u: hwid-%llx, '%s'\n",
+				__func__, __LINE__, cpu, cp->hwid,
+				cp->enable_method);
+		else
+			pr_devel("%s:%d: cpu-%u: hwid-%llx, '%s', "
+				"cpu-release-addr %llx\n",
+				__func__, __LINE__, cpu, cp->hwid,
+				cp->enable_method,
+				cp->cpu_release_addr);
+	}
+
+	return 0;
+
+on_error:
+	kfree(info->cp);
+	info->cp = NULL;
+	return -EINVAL;
+}
+
+/**
+ * kexec_boot_info_init - Initialize a kexec_boot_info structure from a dtb.
+ */
+
+static int kexec_boot_info_init(struct kexec_boot_info *info, dtb_t *dtb)
+{
+	struct device_node *dn;
+	struct device_node *i;
+
+	if (!dtb) {
+		/* 1st stage. */
+		dn = NULL;
+	} else {
+		/* 2nd stage. */
+		of_fdt_unflatten_tree((void *)dtb, &dn);
+
+		if (!dn) {
+			pr_err("%s: Error: of_fdt_unflatten_tree failed.\n",
+				__func__);
+			return -EINVAL;
+		}
+	}
+
+	for (info->cpu_count = 0, i = dn; (i = of_find_node_by_type(i, "cpu"));
+		info->cpu_count++)
+		(void)0;
+
+	pr_devel("%s:%d: cpu_count: %u\n", __func__, __LINE__, info->cpu_count);
+
+	if (!info->cpu_count) {
+		pr_err("%s: Error: No cpu nodes found in device tree.\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	return kexec_cpu_info_init(dn, info);
+}
+
+/**
+* kexec_cpu_check - Helper to check compatibility of the 2nd stage kernel.
+*
+* Returns true if everything is OK.
+*/
+
+static bool kexec_cpu_check(struct cpu_properties *cp_1,
+	struct cpu_properties *cp_2)
+{
+	if (debug)
+		BUG_ON(cp_1->hwid != cp_2->hwid);
+
+	if (cp_1->type != cpu_enable_method_psci &&
+		cp_1->type != cpu_enable_method_spin_table) {
+		pr_err("%s:%d: hwid-%llx: Error: "
+			"Unknown enable method: %s.\n", __func__, __LINE__,
+			cp_1->hwid, cp_1->enable_method);
+		return false;
+	}
+
+	if (cp_2->type != cpu_enable_method_psci &&
+		cp_2->type != cpu_enable_method_spin_table) {
+		pr_err("%s:%d: hwid-%llx: Error: "
+			"Unknown enable method: %s.\n", __func__, __LINE__,
+			cp_2->hwid, cp_2->enable_method);
+		return false;
+	}
+
+	if (cp_1->type != cp_2->type) {
+		pr_err("%s:%d: hwid-%llx: Error: "
+			"Enable method mismatch: %s != %s.\n", __func__,
+			__LINE__, cp_1->hwid, cp_1->enable_method,
+			cp_2->enable_method);
+		return false;
+	}
+
+	if (cp_1->type == cpu_enable_method_spin_table) {
+		if (cp_1->cpu_release_addr != cp_2->cpu_release_addr) {
+			pr_err("%s:%d: hwid-%llx: Error: "
+				"cpu-release-addr mismatch %llx != %llx.\n",
+				__func__, __LINE__, cp_1->hwid,
+				cp_1->cpu_release_addr,
+				cp_2->cpu_release_addr);
+			return false;
+		}
+	}
+
+	pr_devel("%s:%d: hwid-%llx: OK\n", __func__, __LINE__, cp_1->hwid);
+
+	return true;
+}
+
+/**
+* kexec_compat_check - Iterator for kexec_cpu_check.
+*/
+
+static int kexec_compat_check(const struct kexec_ctx *ctx)
+{
+	unsigned int cpu_1;
+	unsigned int to_process;
+
+	to_process = min(ctx->first.cpu_count, ctx->second.cpu_count);
+
+	if (ctx->first.cpu_count != ctx->second.cpu_count)
+		pr_warn("%s: Warning: CPU count mismatch %u != %u.\n",
+			__func__, ctx->first.cpu_count, ctx->second.cpu_count);
+
+	for (cpu_1 = 0; cpu_1 < ctx->first.cpu_count; cpu_1++) {
+		unsigned int cpu_2;
+		struct cpu_properties *cp_1 = &ctx->first.cp[cpu_1];
+
+		for (cpu_2 = 0; cpu_2 < ctx->second.cpu_count; cpu_2++) {
+			struct cpu_properties *cp_2 = &ctx->second.cp[cpu_2];
+
+			if (cp_1->hwid != cp_2->hwid)
+				continue;
+
+			if (!kexec_cpu_check(cp_1, cp_2))
+				return -EINVAL;
+
+			to_process--;
+		}
+	}
+
+	if (to_process) {
+		pr_warn("%s: Warning: Failed to process %u CPUs.\n", __func__,
+			to_process);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+void machine_kexec_cleanup(struct kimage *image)
+{
+	kexec_ctx_clean(image);
+}
+
+/**
+ * kexec_check_cpu_die -  Check if cpu_die() will work on secondary processors.
+ */
+
+static int kexec_check_cpu_die(void)
+{
+	unsigned int cpu;
+	unsigned int sum = 0;
+
+	/* For simplicity this also checks the primary CPU. */
+
+	for_each_cpu(cpu, cpu_all_mask) {
+		if (cpu && (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_disable ||
+			cpu_ops[cpu]->cpu_disable(cpu))) {
+			sum++;
+			pr_err("%s: Error: "
+				"CPU %u does not support hot un-plug.\n",
+				__func__, cpu);
+		}
+	}
+
+	return sum ? -EOPNOTSUPP : 0;
+}
+
+/**
+ * machine_kexec_prepare - Prepare for a kexec reboot.
+ *
+ * Called from the core kexec code when a kernel image is loaded.
+ */
+
+int machine_kexec_prepare(struct kimage *image)
+{
+	int result;
+	dtb_t *dtb = NULL;
+	struct kexec_ctx *ctx;
+	const struct kexec_segment *dtb_seg;
+
+	kexec_image_info(image);
+
+	result = kexec_check_cpu_die();
+
+	if (result)
+		goto on_error;
+
+	result = kexec_ctx_alloc(image);
+
+	if (result)
+		goto on_error;
+
+	ctx = kexec_image_to_ctx(image);
+
+	result = kexec_boot_info_init(&ctx->first, NULL);
+
+	if (result)
+		goto on_error;
+
+	dtb_seg = kexec_find_dtb_seg(image);
+
+	if (!dtb_seg) {
+		result = -EINVAL;
+		goto on_error;
+	}
+
+	result = kexec_copy_dtb(dtb_seg, &dtb);
+
+	if (result)
+		goto on_error;
+
+	result = kexec_boot_info_init(&ctx->second, dtb);
+
+	if (result)
+		goto on_error;
+
+	result = kexec_compat_check(ctx);
+
+	if (result)
+		goto on_error;
+
+	kexec_dtb_addr = dtb_seg->mem;
+	kexec_kimage_start = image->start;
+
+	goto on_exit;
+
+on_error:
+	kexec_ctx_clean(image);
+on_exit:
+	kfree(dtb);
+	return result;
+}
+
+/**
+ * kexec_list_flush_cb - Callback to flush the kimage list to PoC.
+ */
+
+static void kexec_list_flush_cb(void *ctx , unsigned int flag,
+	void *addr, void *dest)
+{
+	switch (flag) {
+	case IND_INDIRECTION:
+	case IND_SOURCE:
+		__flush_dcache_area(addr, PAGE_SIZE);
+		break;
+	default:
+		break;
+	}
+}
+
+/**
+ * machine_kexec - Do the kexec reboot.
+ *
+ * Called from the core kexec code for a sys_reboot with LINUX_REBOOT_CMD_KEXEC.
+ */
+
+void machine_kexec(struct kimage *image)
+{
+	phys_addr_t reboot_code_buffer_phys;
+	void *reboot_code_buffer;
+	struct kexec_ctx *ctx = kexec_image_to_ctx(image);
+
+	BUG_ON(relocate_new_kernel_size > KEXEC_CONTROL_PAGE_SIZE);
+	BUG_ON(num_online_cpus() > 1);
+	BUG_ON(!ctx);
+
+	kexec_image_info(image);
+
+	kexec_kimage_head = image->head;
+
+	reboot_code_buffer_phys = page_to_phys(image->control_code_page);
+	reboot_code_buffer = phys_to_virt(reboot_code_buffer_phys);
+
+	pr_devel("%s:%d: control_code_page:        %p\n", __func__, __LINE__,
+		(void *)image->control_code_page);
+	pr_devel("%s:%d: reboot_code_buffer_phys:  %p\n", __func__, __LINE__,
+		(void *)reboot_code_buffer_phys);
+	pr_devel("%s:%d: reboot_code_buffer:       %p\n", __func__, __LINE__,
+		reboot_code_buffer);
+	pr_devel("%s:%d: relocate_new_kernel:      %p\n", __func__, __LINE__,
+		relocate_new_kernel);
+	pr_devel("%s:%d: relocate_new_kernel_size: %lxh(%lu) bytes\n", __func__,
+		__LINE__, relocate_new_kernel_size, relocate_new_kernel_size);
+
+	pr_devel("%s:%d: kexec_dtb_addr:           %p\n", __func__, __LINE__,
+		(void *)kexec_dtb_addr);
+	pr_devel("%s:%d: kexec_kimage_head:        %p\n", __func__, __LINE__,
+		(void *)kexec_kimage_head);
+	pr_devel("%s:%d: kexec_kimage_start:       %p\n", __func__, __LINE__,
+		(void *)kexec_kimage_start);
+
+	/*
+	 * Copy relocate_new_kernel to the reboot_code_buffer for use
+	 * after the kernel is shut down.
+	 */
+
+	memcpy(reboot_code_buffer, relocate_new_kernel,
+		relocate_new_kernel_size);
+
+	/* Assure reboot_code_buffer is copied. */
+
+	mb();
+
+	pr_info("Bye!\n");
+
+	local_disable(DAIF_ALL);
+
+	/* Flush the reboot_code_buffer in preparation for its execution. */
+
+	__flush_dcache_area(reboot_code_buffer, relocate_new_kernel_size);
+
+	/* Flush the kimage list. */
+
+	kexec_list_walk(NULL, image->head, kexec_list_flush_cb);
+
+	soft_restart(reboot_code_buffer_phys);
+}
+
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+	/* Empty routine needed to avoid build errors. */
+}
diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S
new file mode 100644
index 0000000..92aba9d
--- /dev/null
+++ b/arch/arm64/kernel/relocate_kernel.S
@@ -0,0 +1,185 @@
+/*
+ * kexec for arm64
+ *
+ * Copyright (C) Linaro.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/assembler.h>
+#include <asm/memory.h>
+#include <asm/page.h>
+
+/* The list entry flags. */
+
+#define IND_DESTINATION_BIT 0
+#define IND_INDIRECTION_BIT 1
+#define IND_DONE_BIT        2
+#define IND_SOURCE_BIT      3
+
+/*
+ * relocate_new_kernel - Put the 2nd stage kernel image in place and boot it.
+ *
+ * The memory that the old kernel occupies may be overwritten when coping the
+ * new kernel to its final location.  To assure that the relocate_new_kernel
+ * routine which does that copy is not overwritten all code and data needed
+ * by relocate_new_kernel must be between the symbols relocate_new_kernel and
+ * relocate_new_kernel_end.  The machine_kexec() routine will copy
+ * relocate_new_kernel to the kexec control_code_page, a special page which
+ * has been set up to be preserved during the kernel copy operation.
+ */
+
+.align 3
+
+.globl relocate_new_kernel
+relocate_new_kernel:
+
+	/* Setup the list loop variables. */
+
+	ldr	x10, kexec_kimage_head		/* x10 = list entry */
+
+	mrs	x0, ctr_el0
+	ubfm	x0, x0, #16, #19
+	mov	x11, #4
+	lsl	x11, x11, x0			/* x11 = dcache line size */
+
+	mov	x12, xzr			/* x12 = segment start */
+	mov	x13, xzr			/* x13 = entry ptr */
+	mov	x14, xzr			/* x14 = copy dest */
+
+	/* Check if the new kernel needs relocation. */
+
+	cbz	x10, .Ldone
+	tbnz	x10, IND_DONE_BIT, .Ldone
+
+.Lloop:
+	and	x15, x10, PAGE_MASK		/* x15 = addr */
+
+	/* Test the entry flags. */
+
+.Ltest_source:
+	tbz	x10, IND_SOURCE_BIT, .Ltest_indirection
+
+	/* copy_page(x20 = dest, x21 = src) */
+
+	mov x20, x14
+	mov x21, x15
+
+1:	ldp	x22, x23, [x21]
+	ldp	x24, x25, [x21, #16]
+	ldp	x26, x27, [x21, #32]
+	ldp	x28, x29, [x21, #48]
+	add	x21, x21, #64
+	stnp	x22, x23, [x20]
+	stnp	x24, x25, [x20, #16]
+	stnp	x26, x27, [x20, #32]
+	stnp	x28, x29, [x20, #48]
+	add	x20, x20, #64
+	tst	x21, #(PAGE_SIZE - 1)
+	b.ne	1b
+
+	/* dest += PAGE_SIZE */
+
+	add	x14, x14, PAGE_SIZE
+	b	.Lnext
+
+.Ltest_indirection:
+	tbz	x10, IND_INDIRECTION_BIT, .Ltest_destination
+
+	/* ptr = addr */
+
+	mov	x13, x15
+	b	.Lnext
+
+.Ltest_destination:
+	tbz	x10, IND_DESTINATION_BIT, .Lnext
+
+	/* flush segment */
+
+	bl	.Lflush
+	mov	x12, x15
+
+	/* dest = addr */
+
+	mov	x14, x15
+
+.Lnext:
+	/* entry = *ptr++ */
+
+	ldr	x10, [x13]
+	add	x13, x13, 8
+
+	/* while (!(entry & DONE)) */
+
+	tbz	x10, IND_DONE_BIT, .Lloop
+
+.Ldone:
+	/* flush last segment */
+
+	bl	.Lflush
+
+	dsb	sy
+	isb
+	ic	ialluis
+
+	/* start_new_kernel */
+
+	ldr	x4, kexec_kimage_start
+	ldr	x0, kexec_dtb_addr
+	mov	x1, xzr
+	mov	x2, xzr
+	mov	x3, xzr
+	br	x4
+
+/* flush - x11 = line size, x12 = start addr, x14 = end addr. */
+
+.Lflush:
+	cbz	x12, 2f
+	mov	x0, x12
+	sub	x1, x11, #1
+	bic	x0, x0, x1
+1:	dc	civac, x0
+	add	x0, x0, x11
+	cmp	x0, x14
+	b.lo	1b
+2:	ret
+
+.align 3
+
+/* The machine_kexec routines set these variables. */
+
+/*
+ * kexec_dtb_addr - Physical address of the new kernel's device tree.
+ */
+
+.globl kexec_dtb_addr
+kexec_dtb_addr:
+	.quad	0x0
+
+/*
+ * kexec_kimage_head - Copy of image->head, the list of kimage entries.
+ */
+
+.globl kexec_kimage_head
+kexec_kimage_head:
+	.quad	0x0
+
+/*
+ * kexec_kimage_start - Copy of image->start, the entry point of the new kernel.
+ */
+
+.globl kexec_kimage_start
+kexec_kimage_start:
+	.quad	0x0
+
+.Lrelocate_new_kernel_end:
+
+/*
+ * relocate_new_kernel_size - Number of bytes to copy to the control_code_page.
+ */
+
+.globl relocate_new_kernel_size
+relocate_new_kernel_size:
+	.quad .Lrelocate_new_kernel_end - relocate_new_kernel
diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h
index 6925f5b..04626b9 100644
--- a/include/uapi/linux/kexec.h
+++ b/include/uapi/linux/kexec.h
@@ -39,6 +39,7 @@
 #define KEXEC_ARCH_SH      (42 << 16)
 #define KEXEC_ARCH_MIPS_LE (10 << 16)
 #define KEXEC_ARCH_MIPS    ( 8 << 16)
+#define KEXEC_ARCH_ARM64   (183 << 16)
 
 /* The artificial cap on the number of segments passed to kexec_load. */
 #define KEXEC_SEGMENT_MAX 16
-- 
1.9.1



_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [PATCH 12/13] arm64/kexec: Enable kexec in the arm64 defconfig
@ 2014-09-09 22:49   ` Geoff Levand
  0 siblings, 0 replies; 80+ messages in thread
From: Geoff Levand @ 2014-09-09 22:49 UTC (permalink / raw)
  To: Catalin Marinas, Will Deacon
  Cc: marc.zyngier, kexec, linux-arm-kernel, christoffer.dall

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/configs/defconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index d92ef3c..ebf8b3f 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -39,6 +39,7 @@ CONFIG_PREEMPT=y
 CONFIG_KSM=y
 CONFIG_TRANSPARENT_HUGEPAGE=y
 CONFIG_CMA=y
+CONFIG_KEXEC=y
 CONFIG_CMDLINE="console=ttyAMA0"
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
 CONFIG_COMPAT=y
-- 
1.9.1



_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [PATCH 00/13] arm64 kexec kernel patches V2
@ 2014-09-09 22:51 ` Geoff Levand
  0 siblings, 0 replies; 80+ messages in thread
From: Geoff Levand @ 2014-09-09 22:51 UTC (permalink / raw)
  To: linux-arm-kernel

Hi All,

This series adds the core support for kexec re-boots on arm64.  I have tested
with the ARM VE fast model using various kernel config options for both the
first and second stage kernels.

To load a second stage kernel and execute a kexec re-boot on arm64 my patches to
kexec-tools [2], which have not yet been merged upstream, are needed.

This series does not include some re-work of the spin-table CPU enable method
that is needed to support it, nor does it include some re-work of KVM to support
CPU soft reset.  A kernel built with these patches will boot and run correctly,
but will fail to load a kexec kernel if running on a machine with any spin-table
enabled CPUs and will fail the kexec re-boot if the first stage kernel was built
with CONFIG_KVM=y.  Work-in-progress patches to support these are in the master
branch of my linux-kexec repository [1].

Patches 1-5 rework the arm64 hcall mechanism to give the arm64 soft_restart()
routine the ability to switch exception levels from EL1 to EL2 for kernels that
were entered in EL2.

Patches 6 and 7 add two new arm64 generic routines read_cpu_properties() and
local_disable() that the kexec code uses.

Patches 8-10 fixup machine_shutdown() to work correctly for a kexec re-boot.
The implementation relies on the effects of smp_send_stop() to shutdown the
secondary processors of an SMP system via the cpu_disable and cpu_die methods of
the arm64 cpu_ops structure.

Patches 11 and 12 add the actual kexec support.

Patch 13 adds a kernel command line parameter kexec_ignore_compat_check for
expert users to give more control over loading second stage kernels.

Please consider all patches for inclusion.  Any comments or suggestions on how 
to improve are welcome.

[1]  https://git.linaro.org/people/geoff.levand/linux-kexec.git
[2]  https://git.linaro.org/people/geoff.levand/kexec-tools.git

-Geoff

The following changes since commit e3672649faae400e8a598938766a63f395a27ae6:

  arm64: defconfig: increase NR_CPUS default to 64 (2014-09-08 14:55:27 +0100)

are available in the git repository at:

  git://git.linaro.org/people/geoff.levand/linux-kexec.git kexec-v2

for you to fetch changes up to 99f09410a26c001efd06c270aa2e348d6cac6b01:

  arm64/kexec: Add kexec_ignore_compat_check param (2014-09-09 15:33:48 -0700)

Geoff Levand (13):
  arm64: Add ESR_EL2_EC macros to hyp-stub
  arm64/kvm: Fix assembler compatibility of macros
  arm64: Convert hcalls to use ISS field
  arm64: Add new hcall HVC_CALL_FUNC
  arm64: Add EL2 switch to soft_restart
  arm64: Add new routine read_cpu_properties
  arm64: Add new routine local_disable
  arm64: Use cpu_ops for smp_stop
  arm64/kexec: Kexec expects cpu_die
  arm64/kexec: Revert change to machine_shutdown()
  arm64/kexec: Add core kexec support
  arm64/kexec: Enable kexec in the arm64 defconfig
  arm64/kexec: Add kexec_ignore_compat_check param

 Documentation/kernel-parameters.txt |  13 +
 arch/arm64/Kconfig                  |   8 +
 arch/arm64/configs/defconfig        |   1 +
 arch/arm64/include/asm/cpu_ops.h    |   2 +-
 arch/arm64/include/asm/irqflags.h   |  13 +
 arch/arm64/include/asm/kexec.h      |  52 +++
 arch/arm64/include/asm/kvm_arm.h    |   2 +-
 arch/arm64/include/asm/proc-fns.h   |   4 +-
 arch/arm64/include/asm/virt.h       |  31 ++
 arch/arm64/kernel/Makefile          |   2 +
 arch/arm64/kernel/cpu-properties.c  |  58 ++++
 arch/arm64/kernel/cpu-properties.h  |  39 +++
 arch/arm64/kernel/hyp-stub.S        |  51 ++-
 arch/arm64/kernel/machine_kexec.c   | 627 ++++++++++++++++++++++++++++++++++++
 arch/arm64/kernel/process.c         |  15 +-
 arch/arm64/kernel/psci.c            |   4 +-
 arch/arm64/kernel/relocate_kernel.S | 185 +++++++++++
 arch/arm64/kernel/smp.c             |   9 +
 arch/arm64/kvm/hyp.S                |  19 +-
 arch/arm64/mm/proc.S                |  47 ++-
 include/uapi/linux/kexec.h          |   1 +
 21 files changed, 1139 insertions(+), 44 deletions(-)
 create mode 100644 arch/arm64/include/asm/kexec.h
 create mode 100644 arch/arm64/kernel/cpu-properties.c
 create mode 100644 arch/arm64/kernel/cpu-properties.h
 create mode 100644 arch/arm64/kernel/machine_kexec.c
 create mode 100644 arch/arm64/kernel/relocate_kernel.S

-- 
1.9.1

^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 00/13] arm64 kexec kernel patches V2
@ 2014-09-09 22:51 ` Geoff Levand
  0 siblings, 0 replies; 80+ messages in thread
From: Geoff Levand @ 2014-09-09 22:51 UTC (permalink / raw)
  To: Catalin Marinas, Will Deacon
  Cc: marc.zyngier, kexec, linux-arm-kernel, christoffer.dall

Hi All,

This series adds the core support for kexec re-boots on arm64.  I have tested
with the ARM VE fast model using various kernel config options for both the
first and second stage kernels.

To load a second stage kernel and execute a kexec re-boot on arm64 my patches to
kexec-tools [2], which have not yet been merged upstream, are needed.

This series does not include some re-work of the spin-table CPU enable method
that is needed to support it, nor does it include some re-work of KVM to support
CPU soft reset.  A kernel built with these patches will boot and run correctly,
but will fail to load a kexec kernel if running on a machine with any spin-table
enabled CPUs and will fail the kexec re-boot if the first stage kernel was built
with CONFIG_KVM=y.  Work-in-progress patches to support these are in the master
branch of my linux-kexec repository [1].

Patches 1-5 rework the arm64 hcall mechanism to give the arm64 soft_restart()
routine the ability to switch exception levels from EL1 to EL2 for kernels that
were entered in EL2.

Patches 6 and 7 add two new arm64 generic routines read_cpu_properties() and
local_disable() that the kexec code uses.

Patches 8-10 fixup machine_shutdown() to work correctly for a kexec re-boot.
The implementation relies on the effects of smp_send_stop() to shutdown the
secondary processors of an SMP system via the cpu_disable and cpu_die methods of
the arm64 cpu_ops structure.

Patches 11 and 12 add the actual kexec support.

Patch 13 adds a kernel command line parameter kexec_ignore_compat_check for
expert users to give more control over loading second stage kernels.

Please consider all patches for inclusion.  Any comments or suggestions on how 
to improve are welcome.

[1]  https://git.linaro.org/people/geoff.levand/linux-kexec.git
[2]  https://git.linaro.org/people/geoff.levand/kexec-tools.git

-Geoff

The following changes since commit e3672649faae400e8a598938766a63f395a27ae6:

  arm64: defconfig: increase NR_CPUS default to 64 (2014-09-08 14:55:27 +0100)

are available in the git repository at:

  git://git.linaro.org/people/geoff.levand/linux-kexec.git kexec-v2

for you to fetch changes up to 99f09410a26c001efd06c270aa2e348d6cac6b01:

  arm64/kexec: Add kexec_ignore_compat_check param (2014-09-09 15:33:48 -0700)

Geoff Levand (13):
  arm64: Add ESR_EL2_EC macros to hyp-stub
  arm64/kvm: Fix assembler compatibility of macros
  arm64: Convert hcalls to use ISS field
  arm64: Add new hcall HVC_CALL_FUNC
  arm64: Add EL2 switch to soft_restart
  arm64: Add new routine read_cpu_properties
  arm64: Add new routine local_disable
  arm64: Use cpu_ops for smp_stop
  arm64/kexec: Kexec expects cpu_die
  arm64/kexec: Revert change to machine_shutdown()
  arm64/kexec: Add core kexec support
  arm64/kexec: Enable kexec in the arm64 defconfig
  arm64/kexec: Add kexec_ignore_compat_check param

 Documentation/kernel-parameters.txt |  13 +
 arch/arm64/Kconfig                  |   8 +
 arch/arm64/configs/defconfig        |   1 +
 arch/arm64/include/asm/cpu_ops.h    |   2 +-
 arch/arm64/include/asm/irqflags.h   |  13 +
 arch/arm64/include/asm/kexec.h      |  52 +++
 arch/arm64/include/asm/kvm_arm.h    |   2 +-
 arch/arm64/include/asm/proc-fns.h   |   4 +-
 arch/arm64/include/asm/virt.h       |  31 ++
 arch/arm64/kernel/Makefile          |   2 +
 arch/arm64/kernel/cpu-properties.c  |  58 ++++
 arch/arm64/kernel/cpu-properties.h  |  39 +++
 arch/arm64/kernel/hyp-stub.S        |  51 ++-
 arch/arm64/kernel/machine_kexec.c   | 627 ++++++++++++++++++++++++++++++++++++
 arch/arm64/kernel/process.c         |  15 +-
 arch/arm64/kernel/psci.c            |   4 +-
 arch/arm64/kernel/relocate_kernel.S | 185 +++++++++++
 arch/arm64/kernel/smp.c             |   9 +
 arch/arm64/kvm/hyp.S                |  19 +-
 arch/arm64/mm/proc.S                |  47 ++-
 include/uapi/linux/kexec.h          |   1 +
 21 files changed, 1139 insertions(+), 44 deletions(-)
 create mode 100644 arch/arm64/include/asm/kexec.h
 create mode 100644 arch/arm64/kernel/cpu-properties.c
 create mode 100644 arch/arm64/kernel/cpu-properties.h
 create mode 100644 arch/arm64/kernel/machine_kexec.c
 create mode 100644 arch/arm64/kernel/relocate_kernel.S

-- 
1.9.1


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 02/13] arm64/kvm: Fix assembler compatibility of macros
  2014-09-09 22:49   ` Geoff Levand
@ 2014-09-10  8:40     ` Ard Biesheuvel
  -1 siblings, 0 replies; 80+ messages in thread
From: Ard Biesheuvel @ 2014-09-10  8:40 UTC (permalink / raw)
  To: linux-arm-kernel

On 10 September 2014 00:49, Geoff Levand <geoff@infradead.org> wrote:
> Some of the macros defined in kvm_arm.h are useful in the exception vector
> routines, but they are not compatible with the assembler.  Change the
> definition of ESR_EL2_ISS to be compatible.
>
> Fixes build errors like these when using kvm_arm.h in assembly
> source files:
>
>   Error: unexpected characters following instruction at operand 3 -- `add x0,x1,#((1U<<25)-1)'
>
> Signed-off-by: Geoff Levand <geoff@infradead.org>
> ---
>  arch/arm64/include/asm/kvm_arm.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
> index cc83520..e0e7e64 100644
> --- a/arch/arm64/include/asm/kvm_arm.h
> +++ b/arch/arm64/include/asm/kvm_arm.h
> @@ -176,7 +176,7 @@
>  #define ESR_EL2_EC_SHIFT       (26)
>  #define ESR_EL2_EC             (0x3fU << ESR_EL2_EC_SHIFT)
>  #define ESR_EL2_IL             (1U << 25)
> -#define ESR_EL2_ISS            (ESR_EL2_IL - 1)
> +#define ESR_EL2_ISS            (0xffff)

Don't you mean 0x1ffffff?
And, there is a macro UL() for this purpose, so I suppose you could
redefine ESR_EL2_IL as (UL(1) << 25) as well. I know it is not
strictly the same thing, but it should be good enough as this is arm64
only

>  #define ESR_EL2_ISV_SHIFT      (24)
>  #define ESR_EL2_ISV            (1U << ESR_EL2_ISV_SHIFT)
>  #define ESR_EL2_SAS_SHIFT      (22)
> --
> 1.9.1
>
>
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH 02/13] arm64/kvm: Fix assembler compatibility of macros
@ 2014-09-10  8:40     ` Ard Biesheuvel
  0 siblings, 0 replies; 80+ messages in thread
From: Ard Biesheuvel @ 2014-09-10  8:40 UTC (permalink / raw)
  To: Geoff Levand
  Cc: Marc Zyngier, Catalin Marinas, Will Deacon, linux-arm-kernel,
	kexec, Christoffer Dall

On 10 September 2014 00:49, Geoff Levand <geoff@infradead.org> wrote:
> Some of the macros defined in kvm_arm.h are useful in the exception vector
> routines, but they are not compatible with the assembler.  Change the
> definition of ESR_EL2_ISS to be compatible.
>
> Fixes build errors like these when using kvm_arm.h in assembly
> source files:
>
>   Error: unexpected characters following instruction at operand 3 -- `add x0,x1,#((1U<<25)-1)'
>
> Signed-off-by: Geoff Levand <geoff@infradead.org>
> ---
>  arch/arm64/include/asm/kvm_arm.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
> index cc83520..e0e7e64 100644
> --- a/arch/arm64/include/asm/kvm_arm.h
> +++ b/arch/arm64/include/asm/kvm_arm.h
> @@ -176,7 +176,7 @@
>  #define ESR_EL2_EC_SHIFT       (26)
>  #define ESR_EL2_EC             (0x3fU << ESR_EL2_EC_SHIFT)
>  #define ESR_EL2_IL             (1U << 25)
> -#define ESR_EL2_ISS            (ESR_EL2_IL - 1)
> +#define ESR_EL2_ISS            (0xffff)

Don't you mean 0x1ffffff?
And, there is a macro UL() for this purpose, so I suppose you could
redefine ESR_EL2_IL as (UL(1) << 25) as well. I know it is not
strictly the same thing, but it should be good enough as this is arm64
only

>  #define ESR_EL2_ISV_SHIFT      (24)
>  #define ESR_EL2_ISV            (1U << ESR_EL2_ISV_SHIFT)
>  #define ESR_EL2_SAS_SHIFT      (22)
> --
> 1.9.1
>
>
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 02/13] arm64/kvm: Fix assembler compatibility of macros
  2014-09-10  8:40     ` Ard Biesheuvel
@ 2014-09-10 16:35       ` Geoff Levand
  -1 siblings, 0 replies; 80+ messages in thread
From: Geoff Levand @ 2014-09-10 16:35 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, 2014-09-10 at 10:40 +0200, Ard Biesheuvel wrote:
> On 10 September 2014 00:49, Geoff Levand <geoff@infradead.org> wrote:
> > Some of the macros defined in kvm_arm.h are useful in the exception vector
> > routines, but they are not compatible with the assembler.  Change the
> > definition of ESR_EL2_ISS to be compatible.
> >
> > Fixes build errors like these when using kvm_arm.h in assembly
> > source files:
> >
> >   Error: unexpected characters following instruction at operand 3 -- `add x0,x1,#((1U<<25)-1)'
> >
> > Signed-off-by: Geoff Levand <geoff@infradead.org>
> > ---
> >  arch/arm64/include/asm/kvm_arm.h | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
> > index cc83520..e0e7e64 100644
> > --- a/arch/arm64/include/asm/kvm_arm.h
> > +++ b/arch/arm64/include/asm/kvm_arm.h
> > @@ -176,7 +176,7 @@
> >  #define ESR_EL2_EC_SHIFT       (26)
> >  #define ESR_EL2_EC             (0x3fU << ESR_EL2_EC_SHIFT)
> >  #define ESR_EL2_IL             (1U << 25)
> > -#define ESR_EL2_ISS            (ESR_EL2_IL - 1)
> > +#define ESR_EL2_ISS            (0xffff)
> 
> Don't you mean 0x1ffffff?

Hcalls have a 16 bit 'payload', the upper bits of the ISS field
are specified as zero by the architecture so 0xffff is the same
as 0x1ffffff.

> And, there is a macro UL() for this purpose, so I suppose you could
> redefine ESR_EL2_IL as (UL(1) << 25) as well. I know it is not
> strictly the same thing, but it should be good enough as this is arm64
> only

Sure that will be OK.  The one other use of ESR_EL2_IL will promote
the operation to unsigned long without ill effect.  I'll prepare an
updated patch.

-Geoff

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH 02/13] arm64/kvm: Fix assembler compatibility of macros
@ 2014-09-10 16:35       ` Geoff Levand
  0 siblings, 0 replies; 80+ messages in thread
From: Geoff Levand @ 2014-09-10 16:35 UTC (permalink / raw)
  To: Ard Biesheuvel
  Cc: Marc Zyngier, Catalin Marinas, Will Deacon, linux-arm-kernel,
	kexec, Christoffer Dall

On Wed, 2014-09-10 at 10:40 +0200, Ard Biesheuvel wrote:
> On 10 September 2014 00:49, Geoff Levand <geoff@infradead.org> wrote:
> > Some of the macros defined in kvm_arm.h are useful in the exception vector
> > routines, but they are not compatible with the assembler.  Change the
> > definition of ESR_EL2_ISS to be compatible.
> >
> > Fixes build errors like these when using kvm_arm.h in assembly
> > source files:
> >
> >   Error: unexpected characters following instruction at operand 3 -- `add x0,x1,#((1U<<25)-1)'
> >
> > Signed-off-by: Geoff Levand <geoff@infradead.org>
> > ---
> >  arch/arm64/include/asm/kvm_arm.h | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
> > index cc83520..e0e7e64 100644
> > --- a/arch/arm64/include/asm/kvm_arm.h
> > +++ b/arch/arm64/include/asm/kvm_arm.h
> > @@ -176,7 +176,7 @@
> >  #define ESR_EL2_EC_SHIFT       (26)
> >  #define ESR_EL2_EC             (0x3fU << ESR_EL2_EC_SHIFT)
> >  #define ESR_EL2_IL             (1U << 25)
> > -#define ESR_EL2_ISS            (ESR_EL2_IL - 1)
> > +#define ESR_EL2_ISS            (0xffff)
> 
> Don't you mean 0x1ffffff?

Hcalls have a 16 bit 'payload', the upper bits of the ISS field
are specified as zero by the architecture so 0xffff is the same
as 0x1ffffff.

> And, there is a macro UL() for this purpose, so I suppose you could
> redefine ESR_EL2_IL as (UL(1) << 25) as well. I know it is not
> strictly the same thing, but it should be good enough as this is arm64
> only

Sure that will be OK.  The one other use of ESR_EL2_IL will promote
the operation to unsigned long without ill effect.  I'll prepare an
updated patch.

-Geoff



_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 04/13] arm64: Add new hcall HVC_CALL_FUNC
  2014-09-09 22:49   ` Geoff Levand
@ 2014-09-10 17:07     ` Will Deacon
  -1 siblings, 0 replies; 80+ messages in thread
From: Will Deacon @ 2014-09-10 17:07 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Sep 09, 2014 at 11:49:04PM +0100, Geoff Levand wrote:
> Add the new hcall HVC_CALL_FUNC that allows execution of a function at EL2.
> During CPU reset the CPU must be brought to the exception level it had on
> entry to the kernel.  The HVC_CALL_FUNC hcall will provide the mechanism
> needed for this exception level switch.
> 
> Signed-off-by: Geoff Levand <geoff@infradead.org>
> ---
>  arch/arm64/include/asm/virt.h | 11 +++++++++++
>  arch/arm64/kernel/hyp-stub.S  | 10 ++++++++++
>  2 files changed, 21 insertions(+)

[...]

> diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
> index 9ab5f70..a21cf51 100644
> --- a/arch/arm64/kernel/hyp-stub.S
> +++ b/arch/arm64/kernel/hyp-stub.S
> @@ -75,7 +75,17 @@ el1_sync:
>  1:	cmp	x10, #HVC_SET_VECTORS
>  	b.ne	1f
>  	msr	vbar_el2, x0
> +	b	2f
>  
> +1:	cmp	x10, #HVC_CALL_FUNC
> +	b.ne    1f
> +	mov	x29, lr
> +	mov	lr, x0
> +	mov	x0, x1
> +	mov	x1, x2
> +	mov	x2, x3
> +	blr	lr
> +	mov	lr, x29

Why are you clobbering x29?

Will

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH 04/13] arm64: Add new hcall HVC_CALL_FUNC
@ 2014-09-10 17:07     ` Will Deacon
  0 siblings, 0 replies; 80+ messages in thread
From: Will Deacon @ 2014-09-10 17:07 UTC (permalink / raw)
  To: Geoff Levand
  Cc: Marc Zyngier, Catalin Marinas, kexec, linux-arm-kernel, christoffer.dall

On Tue, Sep 09, 2014 at 11:49:04PM +0100, Geoff Levand wrote:
> Add the new hcall HVC_CALL_FUNC that allows execution of a function at EL2.
> During CPU reset the CPU must be brought to the exception level it had on
> entry to the kernel.  The HVC_CALL_FUNC hcall will provide the mechanism
> needed for this exception level switch.
> 
> Signed-off-by: Geoff Levand <geoff@infradead.org>
> ---
>  arch/arm64/include/asm/virt.h | 11 +++++++++++
>  arch/arm64/kernel/hyp-stub.S  | 10 ++++++++++
>  2 files changed, 21 insertions(+)

[...]

> diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
> index 9ab5f70..a21cf51 100644
> --- a/arch/arm64/kernel/hyp-stub.S
> +++ b/arch/arm64/kernel/hyp-stub.S
> @@ -75,7 +75,17 @@ el1_sync:
>  1:	cmp	x10, #HVC_SET_VECTORS
>  	b.ne	1f
>  	msr	vbar_el2, x0
> +	b	2f
>  
> +1:	cmp	x10, #HVC_CALL_FUNC
> +	b.ne    1f
> +	mov	x29, lr
> +	mov	lr, x0
> +	mov	x0, x1
> +	mov	x1, x2
> +	mov	x2, x3
> +	blr	lr
> +	mov	lr, x29

Why are you clobbering x29?

Will

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 02/13] arm64/kvm: Fix assembler compatibility of macros
  2014-09-10 16:35       ` Geoff Levand
@ 2014-09-10 17:09         ` Ard Biesheuvel
  -1 siblings, 0 replies; 80+ messages in thread
From: Ard Biesheuvel @ 2014-09-10 17:09 UTC (permalink / raw)
  To: linux-arm-kernel

On 10 September 2014 18:35, Geoff Levand <geoff@infradead.org> wrote:
> On Wed, 2014-09-10 at 10:40 +0200, Ard Biesheuvel wrote:
>> On 10 September 2014 00:49, Geoff Levand <geoff@infradead.org> wrote:
>> > Some of the macros defined in kvm_arm.h are useful in the exception vector
>> > routines, but they are not compatible with the assembler.  Change the
>> > definition of ESR_EL2_ISS to be compatible.
>> >
>> > Fixes build errors like these when using kvm_arm.h in assembly
>> > source files:
>> >
>> >   Error: unexpected characters following instruction at operand 3 -- `add x0,x1,#((1U<<25)-1)'
>> >
>> > Signed-off-by: Geoff Levand <geoff@infradead.org>
>> > ---
>> >  arch/arm64/include/asm/kvm_arm.h | 2 +-
>> >  1 file changed, 1 insertion(+), 1 deletion(-)
>> >
>> > diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
>> > index cc83520..e0e7e64 100644
>> > --- a/arch/arm64/include/asm/kvm_arm.h
>> > +++ b/arch/arm64/include/asm/kvm_arm.h
>> > @@ -176,7 +176,7 @@
>> >  #define ESR_EL2_EC_SHIFT       (26)
>> >  #define ESR_EL2_EC             (0x3fU << ESR_EL2_EC_SHIFT)
>> >  #define ESR_EL2_IL             (1U << 25)
>> > -#define ESR_EL2_ISS            (ESR_EL2_IL - 1)
>> > +#define ESR_EL2_ISS            (0xffff)
>>
>> Don't you mean 0x1ffffff?
>
> Hcalls have a 16 bit 'payload', the upper bits of the ISS field
> are specified as zero by the architecture so 0xffff is the same
> as 0x1ffffff.
>

Even if HVC is currently the only exception we are taking in EL2 (is
that the case btw?), it seems wrong to define this field in such a way
that it
(a) deviates from how the architecture specifies ESR_ELx.ISS and
(b) may cause surprises once someone unsuspectingly starts and'ing his
ESR values produced by another exception class with it, expecting the
macro's value to reflect its name

>> And, there is a macro UL() for this purpose, so I suppose you could
>> redefine ESR_EL2_IL as (UL(1) << 25) as well. I know it is not
>> strictly the same thing, but it should be good enough as this is arm64
>> only
>
> Sure that will be OK.  The one other use of ESR_EL2_IL will promote
> the operation to unsigned long without ill effect.  I'll prepare an
> updated patch.
>
> -Geoff
>
>

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH 02/13] arm64/kvm: Fix assembler compatibility of macros
@ 2014-09-10 17:09         ` Ard Biesheuvel
  0 siblings, 0 replies; 80+ messages in thread
From: Ard Biesheuvel @ 2014-09-10 17:09 UTC (permalink / raw)
  To: Geoff Levand
  Cc: Marc Zyngier, Catalin Marinas, Will Deacon, linux-arm-kernel,
	kexec, Christoffer Dall

On 10 September 2014 18:35, Geoff Levand <geoff@infradead.org> wrote:
> On Wed, 2014-09-10 at 10:40 +0200, Ard Biesheuvel wrote:
>> On 10 September 2014 00:49, Geoff Levand <geoff@infradead.org> wrote:
>> > Some of the macros defined in kvm_arm.h are useful in the exception vector
>> > routines, but they are not compatible with the assembler.  Change the
>> > definition of ESR_EL2_ISS to be compatible.
>> >
>> > Fixes build errors like these when using kvm_arm.h in assembly
>> > source files:
>> >
>> >   Error: unexpected characters following instruction at operand 3 -- `add x0,x1,#((1U<<25)-1)'
>> >
>> > Signed-off-by: Geoff Levand <geoff@infradead.org>
>> > ---
>> >  arch/arm64/include/asm/kvm_arm.h | 2 +-
>> >  1 file changed, 1 insertion(+), 1 deletion(-)
>> >
>> > diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
>> > index cc83520..e0e7e64 100644
>> > --- a/arch/arm64/include/asm/kvm_arm.h
>> > +++ b/arch/arm64/include/asm/kvm_arm.h
>> > @@ -176,7 +176,7 @@
>> >  #define ESR_EL2_EC_SHIFT       (26)
>> >  #define ESR_EL2_EC             (0x3fU << ESR_EL2_EC_SHIFT)
>> >  #define ESR_EL2_IL             (1U << 25)
>> > -#define ESR_EL2_ISS            (ESR_EL2_IL - 1)
>> > +#define ESR_EL2_ISS            (0xffff)
>>
>> Don't you mean 0x1ffffff?
>
> Hcalls have a 16 bit 'payload', the upper bits of the ISS field
> are specified as zero by the architecture so 0xffff is the same
> as 0x1ffffff.
>

Even if HVC is currently the only exception we are taking in EL2 (is
that the case btw?), it seems wrong to define this field in such a way
that it
(a) deviates from how the architecture specifies ESR_ELx.ISS and
(b) may cause surprises once someone unsuspectingly starts and'ing his
ESR values produced by another exception class with it, expecting the
macro's value to reflect its name

>> And, there is a macro UL() for this purpose, so I suppose you could
>> redefine ESR_EL2_IL as (UL(1) << 25) as well. I know it is not
>> strictly the same thing, but it should be good enough as this is arm64
>> only
>
> Sure that will be OK.  The one other use of ESR_EL2_IL will promote
> the operation to unsigned long without ill effect.  I'll prepare an
> updated patch.
>
> -Geoff
>
>

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 04/13] arm64: Add new hcall HVC_CALL_FUNC
  2014-09-10 17:07     ` Will Deacon
@ 2014-09-10 17:23       ` Geoff Levand
  -1 siblings, 0 replies; 80+ messages in thread
From: Geoff Levand @ 2014-09-10 17:23 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Will,

On Wed, 2014-09-10 at 18:07 +0100, Will Deacon wrote:
> On Tue, Sep 09, 2014 at 11:49:04PM +0100, Geoff Levand wrote:
> > Add the new hcall HVC_CALL_FUNC that allows execution of a function at EL2.
> > During CPU reset the CPU must be brought to the exception level it had on
> > entry to the kernel.  The HVC_CALL_FUNC hcall will provide the mechanism
> > needed for this exception level switch.
> > 
> > Signed-off-by: Geoff Levand <geoff@infradead.org>
> > ---
> >  arch/arm64/include/asm/virt.h | 11 +++++++++++
> >  arch/arm64/kernel/hyp-stub.S  | 10 ++++++++++
> >  2 files changed, 21 insertions(+)
> 
> [...]
> 
> > diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
> > index 9ab5f70..a21cf51 100644
> > --- a/arch/arm64/kernel/hyp-stub.S
> > +++ b/arch/arm64/kernel/hyp-stub.S
> > @@ -75,7 +75,17 @@ el1_sync:
> >  1:	cmp	x10, #HVC_SET_VECTORS
> >  	b.ne	1f
> >  	msr	vbar_el2, x0
> > +	b	2f
> >  
> > +1:	cmp	x10, #HVC_CALL_FUNC
> > +	b.ne    1f
> > +	mov	x29, lr
> > +	mov	lr, x0
> > +	mov	x0, x1
> > +	mov	x1, x2
> > +	mov	x2, x3
> > +	blr	lr
> > +	mov	lr, x29
> 
> Why are you clobbering x29?

I can change this to x28, unless you can recommend another?

-Geoff

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH 04/13] arm64: Add new hcall HVC_CALL_FUNC
@ 2014-09-10 17:23       ` Geoff Levand
  0 siblings, 0 replies; 80+ messages in thread
From: Geoff Levand @ 2014-09-10 17:23 UTC (permalink / raw)
  To: Will Deacon
  Cc: Marc Zyngier, Catalin Marinas, kexec, linux-arm-kernel, christoffer.dall

Hi Will,

On Wed, 2014-09-10 at 18:07 +0100, Will Deacon wrote:
> On Tue, Sep 09, 2014 at 11:49:04PM +0100, Geoff Levand wrote:
> > Add the new hcall HVC_CALL_FUNC that allows execution of a function at EL2.
> > During CPU reset the CPU must be brought to the exception level it had on
> > entry to the kernel.  The HVC_CALL_FUNC hcall will provide the mechanism
> > needed for this exception level switch.
> > 
> > Signed-off-by: Geoff Levand <geoff@infradead.org>
> > ---
> >  arch/arm64/include/asm/virt.h | 11 +++++++++++
> >  arch/arm64/kernel/hyp-stub.S  | 10 ++++++++++
> >  2 files changed, 21 insertions(+)
> 
> [...]
> 
> > diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
> > index 9ab5f70..a21cf51 100644
> > --- a/arch/arm64/kernel/hyp-stub.S
> > +++ b/arch/arm64/kernel/hyp-stub.S
> > @@ -75,7 +75,17 @@ el1_sync:
> >  1:	cmp	x10, #HVC_SET_VECTORS
> >  	b.ne	1f
> >  	msr	vbar_el2, x0
> > +	b	2f
> >  
> > +1:	cmp	x10, #HVC_CALL_FUNC
> > +	b.ne    1f
> > +	mov	x29, lr
> > +	mov	lr, x0
> > +	mov	x0, x1
> > +	mov	x1, x2
> > +	mov	x2, x3
> > +	blr	lr
> > +	mov	lr, x29
> 
> Why are you clobbering x29?

I can change this to x28, unless you can recommend another?

-Geoff



_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 04/13] arm64: Add new hcall HVC_CALL_FUNC
  2014-09-10 17:23       ` Geoff Levand
@ 2014-09-10 17:35         ` Will Deacon
  -1 siblings, 0 replies; 80+ messages in thread
From: Will Deacon @ 2014-09-10 17:35 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, Sep 10, 2014 at 06:23:57PM +0100, Geoff Levand wrote:
> On Wed, 2014-09-10 at 18:07 +0100, Will Deacon wrote:
> > On Tue, Sep 09, 2014 at 11:49:04PM +0100, Geoff Levand wrote:
> > > diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
> > > index 9ab5f70..a21cf51 100644
> > > --- a/arch/arm64/kernel/hyp-stub.S
> > > +++ b/arch/arm64/kernel/hyp-stub.S
> > > @@ -75,7 +75,17 @@ el1_sync:
> > >  1:	cmp	x10, #HVC_SET_VECTORS
> > >  	b.ne	1f
> > >  	msr	vbar_el2, x0
> > > +	b	2f
> > >  
> > > +1:	cmp	x10, #HVC_CALL_FUNC
> > > +	b.ne    1f
> > > +	mov	x29, lr
> > > +	mov	lr, x0
> > > +	mov	x0, x1
> > > +	mov	x1, x2
> > > +	mov	x2, x3
> > > +	blr	lr
> > > +	mov	lr, x29
> > 
> > Why are you clobbering x29?
> 
> I can change this to x28, unless you can recommend another?

How about something that's not callee saved?

Will

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH 04/13] arm64: Add new hcall HVC_CALL_FUNC
@ 2014-09-10 17:35         ` Will Deacon
  0 siblings, 0 replies; 80+ messages in thread
From: Will Deacon @ 2014-09-10 17:35 UTC (permalink / raw)
  To: Geoff Levand
  Cc: Marc Zyngier, Catalin Marinas, kexec, linux-arm-kernel, christoffer.dall

On Wed, Sep 10, 2014 at 06:23:57PM +0100, Geoff Levand wrote:
> On Wed, 2014-09-10 at 18:07 +0100, Will Deacon wrote:
> > On Tue, Sep 09, 2014 at 11:49:04PM +0100, Geoff Levand wrote:
> > > diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
> > > index 9ab5f70..a21cf51 100644
> > > --- a/arch/arm64/kernel/hyp-stub.S
> > > +++ b/arch/arm64/kernel/hyp-stub.S
> > > @@ -75,7 +75,17 @@ el1_sync:
> > >  1:	cmp	x10, #HVC_SET_VECTORS
> > >  	b.ne	1f
> > >  	msr	vbar_el2, x0
> > > +	b	2f
> > >  
> > > +1:	cmp	x10, #HVC_CALL_FUNC
> > > +	b.ne    1f
> > > +	mov	x29, lr
> > > +	mov	lr, x0
> > > +	mov	x0, x1
> > > +	mov	x1, x2
> > > +	mov	x2, x3
> > > +	blr	lr
> > > +	mov	lr, x29
> > 
> > Why are you clobbering x29?
> 
> I can change this to x28, unless you can recommend another?

How about something that's not callee saved?

Will

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH V2 02/13] arm64/kvm: Fix assembler compatibility of macros
  2014-09-09 22:49   ` Geoff Levand
@ 2014-09-10 18:04     ` Geoff Levand
  -1 siblings, 0 replies; 80+ messages in thread
From: Geoff Levand @ 2014-09-10 18:04 UTC (permalink / raw)
  To: linux-arm-kernel

Some of the macros defined in kvm_arm.h are useful in the exception vector
routines, but they are not compatible with the assembler.  Change the
definition of ESR_EL2_ISS to be compatible.

Fixes build errors like these when using kvm_arm.h in assembly
source files:

  Error: unexpected characters following instruction at operand 3 -- `and x0,x1,#((1U<<25)-1)'

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/include/asm/kvm_arm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index cc83520..fb42ab5 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -175,7 +175,7 @@
 /* Exception Syndrome Register (ESR) bits */
 #define ESR_EL2_EC_SHIFT	(26)
 #define ESR_EL2_EC		(0x3fU << ESR_EL2_EC_SHIFT)
-#define ESR_EL2_IL		(1U << 25)
+#define ESR_EL2_IL		(UL(1) << 25)
 #define ESR_EL2_ISS		(ESR_EL2_IL - 1)
 #define ESR_EL2_ISV_SHIFT	(24)
 #define ESR_EL2_ISV		(1U << ESR_EL2_ISV_SHIFT)
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [PATCH V2 02/13] arm64/kvm: Fix assembler compatibility of macros
@ 2014-09-10 18:04     ` Geoff Levand
  0 siblings, 0 replies; 80+ messages in thread
From: Geoff Levand @ 2014-09-10 18:04 UTC (permalink / raw)
  To: Catalin Marinas
  Cc: marc.zyngier, kexec, Will Deacon, linux-arm-kernel, christoffer.dall

Some of the macros defined in kvm_arm.h are useful in the exception vector
routines, but they are not compatible with the assembler.  Change the
definition of ESR_EL2_ISS to be compatible.

Fixes build errors like these when using kvm_arm.h in assembly
source files:

  Error: unexpected characters following instruction at operand 3 -- `and x0,x1,#((1U<<25)-1)'

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/include/asm/kvm_arm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index cc83520..fb42ab5 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -175,7 +175,7 @@
 /* Exception Syndrome Register (ESR) bits */
 #define ESR_EL2_EC_SHIFT	(26)
 #define ESR_EL2_EC		(0x3fU << ESR_EL2_EC_SHIFT)
-#define ESR_EL2_IL		(1U << 25)
+#define ESR_EL2_IL		(UL(1) << 25)
 #define ESR_EL2_ISS		(ESR_EL2_IL - 1)
 #define ESR_EL2_ISV_SHIFT	(24)
 #define ESR_EL2_ISV		(1U << ESR_EL2_ISV_SHIFT)
-- 
1.9.1




_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [PATCH V2 04/13] arm64: Add new hcall HVC_CALL_FUNC
  2014-09-09 22:49   ` Geoff Levand
@ 2014-09-10 18:11     ` Geoff Levand
  -1 siblings, 0 replies; 80+ messages in thread
From: Geoff Levand @ 2014-09-10 18:11 UTC (permalink / raw)
  To: linux-arm-kernel

Add the new hcall HVC_CALL_FUNC that allows execution of a function at EL2.
During CPU reset the CPU must be brought to the exception level it had on
entry to the kernel.  The HVC_CALL_FUNC hcall will provide the mechanism
needed for this exception level switch.

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/include/asm/virt.h | 13 +++++++++++++
 arch/arm64/kernel/hyp-stub.S  | 10 ++++++++++
 2 files changed, 23 insertions(+)

diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 894fe53..cc4250c 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -41,6 +41,19 @@
 
 #define HVC_KVM_CALL_HYP 3
 
+/*
+ * HVC_CALL_FUNC - Execute a function at EL2.
+ *
+ * @x0: Physical address of the funtion to be executed.
+ * @x1: Passed as the first argument to the function.
+ * @x2: Passed as the second argument to the function.
+ * @x3: Passed as the third argument to the function.
+ *
+ * The called function must preserve the contents of register x18.
+ */
+
+#define HVC_CALL_FUNC 4
+
 #ifndef __ASSEMBLY__
 
 /*
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index 9ab5f70..27d786d 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -75,7 +75,17 @@ el1_sync:
 1:	cmp	x10, #HVC_SET_VECTORS
 	b.ne	1f
 	msr	vbar_el2, x0
+	b	2f
 
+1:	cmp	x10, #HVC_CALL_FUNC
+	b.ne    1f
+	mov	x18, lr
+	mov	lr, x0
+	mov	x0, x1
+	mov	x1, x2
+	mov	x2, x3
+	blr	lr
+	mov	lr, x18
 1:
 2:	eret
 ENDPROC(el1_sync)
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [PATCH V2 04/13] arm64: Add new hcall HVC_CALL_FUNC
@ 2014-09-10 18:11     ` Geoff Levand
  0 siblings, 0 replies; 80+ messages in thread
From: Geoff Levand @ 2014-09-10 18:11 UTC (permalink / raw)
  To: Catalin Marinas, Will Deacon
  Cc: marc.zyngier, kexec, linux-arm-kernel, christoffer.dall

Add the new hcall HVC_CALL_FUNC that allows execution of a function at EL2.
During CPU reset the CPU must be brought to the exception level it had on
entry to the kernel.  The HVC_CALL_FUNC hcall will provide the mechanism
needed for this exception level switch.

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/include/asm/virt.h | 13 +++++++++++++
 arch/arm64/kernel/hyp-stub.S  | 10 ++++++++++
 2 files changed, 23 insertions(+)

diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 894fe53..cc4250c 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -41,6 +41,19 @@
 
 #define HVC_KVM_CALL_HYP 3
 
+/*
+ * HVC_CALL_FUNC - Execute a function at EL2.
+ *
+ * @x0: Physical address of the funtion to be executed.
+ * @x1: Passed as the first argument to the function.
+ * @x2: Passed as the second argument to the function.
+ * @x3: Passed as the third argument to the function.
+ *
+ * The called function must preserve the contents of register x18.
+ */
+
+#define HVC_CALL_FUNC 4
+
 #ifndef __ASSEMBLY__
 
 /*
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index 9ab5f70..27d786d 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -75,7 +75,17 @@ el1_sync:
 1:	cmp	x10, #HVC_SET_VECTORS
 	b.ne	1f
 	msr	vbar_el2, x0
+	b	2f
 
+1:	cmp	x10, #HVC_CALL_FUNC
+	b.ne    1f
+	mov	x18, lr
+	mov	lr, x0
+	mov	x0, x1
+	mov	x1, x2
+	mov	x2, x3
+	blr	lr
+	mov	lr, x18
 1:
 2:	eret
 ENDPROC(el1_sync)
-- 
1.9.1




_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [PATCH 03/13] arm64: Convert hcalls to use ISS field
  2014-09-09 22:49   ` Geoff Levand
@ 2014-09-11 16:14     ` Arun Chandran
  -1 siblings, 0 replies; 80+ messages in thread
From: Arun Chandran @ 2014-09-11 16:14 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Geoff,
On Wed, Sep 10, 2014 at 4:19 AM, Geoff Levand <geoff@infradead.org> wrote:
> To allow for additional hcalls to be defined and to make the arm64 hcall API
> more consistent across exception vector routines change the hcall implementations
> to use the ISS field of the ESR_EL2 register to specify the hcall type.
>
> The existing arm64 hcall implementations are limited in that they only allow
> for two distinct hcalls; with the x0 register either zero, or not zero.  Also,
> the API of the hyp-stub exception vector routines and the KVM exception vector
> routines differ; hyp-stub uses a non-zero value in x0 to implement
> __hyp_set_vectors, whereas KVM uses it to implement kvm_call_hyp.
>
> Define three new preprocessor macros HVC_GET_VECTORS, HVC_SET_VECTORS and
> HVC_KVM_CALL_HYP and to be used as hcall type specifiers and convert the
> existing __hyp_get_vectors(), __hyp_set_vectors() and kvm_call_hyp() routines
> to use these new macros when executing and HVC call.  Also change the
> corresponding hyp-stub and KVM el1_sync exception vector routines to use these
> new macros.
>

What about using a more simplified approach like below.
I was able to use your kexec tree and successfully boot
KVM/ ~KVM combinations with this patch (patch attached).

########################
diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 7a5df52..264d451 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -37,6 +37,8 @@ extern u32 __boot_cpu_mode[2];
 void __hyp_set_vectors(phys_addr_t phys_vector_base);
 phys_addr_t __hyp_get_vectors(void);

+void __hyp_kexec_final_call(unsigned long func_addr) __attribute__((noreturn));
+
 /* Reports the availability of HYP mode */
 static inline bool is_hyp_mode_available(void)
 {
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index a272f33..456af5b 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -52,16 +52,30 @@ ENDPROC(__hyp_stub_vectors)

  .align 11

+#define ESR_EL2_EC_SHIFT 26
+#define ESR_EL2_EC_HVC64 0x16
+#define ESR_EL2_ISS 0xffff
+#define KEXEC_FINAL_CALL 0xffff
+
 el1_sync:
- mrs x1, esr_el2
- lsr x1, x1, #26
- cmp x1, #0x16
- b.ne 2f // Not an HVC trap
- cbz x0, 1f
- msr vbar_el2, x0 // Set vbar_el2
- b 2f
-1: mrs x0, vbar_el2 // Return vbar_el2
-2: eret
+ mrs     x10, esr_el2
+ lsr     x9, x10, #ESR_EL2_EC_SHIFT      // x9=EC
+ and     x10, x10, #ESR_EL2_ISS          // x10=ISS
+
+ cmp     x9, #ESR_EL2_EC_HVC64
+ b.ne    4f                              // Not a host HVC trap
+
+ cbnz x10, 3f // kexec final call
+ cbz x0, 2f
+
+1: msr vbar_el2, x0 // Set vbar_el2
+ eret
+
+2: mrs x0, vbar_el2 // Return vbar_el2
+ eret
+
+3: br x0
+4: eret
 ENDPROC(el1_sync)

 .macro invalid_vector label
@@ -99,12 +113,18 @@ ENDPROC(\label)
  * so you will need to set that to something sensible at the new hypervisor's
  * initialisation entry point.
  */
-
 ENTRY(__hyp_get_vectors)
  mov x0, xzr
- // fall through
-ENTRY(__hyp_set_vectors)
  hvc #0
  ret
 ENDPROC(__hyp_get_vectors)
+
+ENTRY(__hyp_set_vectors)
+ hvc #0
+ ret
 ENDPROC(__hyp_set_vectors)
+
+/* x0 -> final kexec cleanup func addr */
+ENTRY(__hyp_kexec_final_call)
+ hvc #KEXEC_FINAL_CALL
+ENDPROC(__hyp_kexec_final_call)
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index b72aa9f..b5803e3 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -1135,16 +1135,20 @@ ENDPROC(\label)
  invalid_vector el1_fiq_invalid, __kvm_hyp_panic
  invalid_vector el1_error_invalid, __kvm_hyp_panic

+#define ESR_EL2_ISS 0xffff
 el1_sync: // Guest trapped into EL2
  push x0, x1
  push x2, x3

  mrs x1, esr_el2
+ and     x0, x1, #ESR_EL2_ISS
  lsr x2, x1, #ESR_EL2_EC_SHIFT

  cmp x2, #ESR_EL2_EC_HVC64
  b.ne el1_trap

+ cbnz x0, 3f // final kexec call
+
  mrs x3, vttbr_el2 // If vttbr is valid, the 64bit guest
  cbnz x3, el1_trap // called HVC

@@ -1159,6 +1163,7 @@ el1_sync: // Guest trapped into EL2

 1: push lr, xzr

+
  /*
  * Compute the function address in EL2, and shuffle the parameters.
  */
@@ -1172,6 +1177,17 @@ el1_sync: // Guest trapped into EL2
  pop lr, xzr
 2: eret

+ /* Call the kexec clean up function */
+3: pop x2, x3
+ pop x0, x1
+ /* Stage-2 translation */
+ msr vttbr_el2, xzr
+ mrs x1, sctlr_el2
+ bic x1, x1, #1
+ msr sctlr_el2, x1 // disable the MMU
+ isb
+ br x0
+
 el1_trap:
  /*
  * x1: ESR
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 3cb6dec..b961482 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -25,6 +25,7 @@
 #include <asm/hwcap.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/pgtable.h>
+#include <asm/virt.h>

 #include "proc-macros.S"

@@ -69,19 +70,33 @@ ENDPROC(cpu_cache_off)
  */
  .align 5
 ENTRY(cpu_reset)
+ mov x19, x0
+ adr x0, hyp_final_cleanup
+ sub w1, w1, #BOOT_CPU_MODE_EL2
+ cbz w1, __hyp_kexec_final_call
  ret x0
 ENDPROC(cpu_reset)

+ENTRY(hyp_final_cleanup)
+ /* Need to do the final EL2 clean up here */
+ br x19
+ENDPROC(hyp_final_cleanup)
+

##########################

--Arun



> Signed-off-by: Geoff Levand <geoff@infradead.org>
> ---
>  arch/arm64/include/asm/virt.h | 20 ++++++++++++++++++++
>  arch/arm64/kernel/hyp-stub.S  | 38 ++++++++++++++++++++++++++------------
>  arch/arm64/kvm/hyp.S          | 19 ++++++++++++-------
>  3 files changed, 58 insertions(+), 19 deletions(-)
>
> diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
> index 7a5df52..894fe53 100644
> --- a/arch/arm64/include/asm/virt.h
> +++ b/arch/arm64/include/asm/virt.h
> @@ -21,6 +21,26 @@
>  #define BOOT_CPU_MODE_EL1      (0xe11)
>  #define BOOT_CPU_MODE_EL2      (0xe12)
>
> +/*
> + * HVC_GET_VECTORS - Return the value of the vbar_el2 register.
> + */
> +
> +#define HVC_GET_VECTORS 1
> +
> +/*
> + * HVC_SET_VECTORS - Set the value of the vbar_el2 register.
> + *
> + * @x0: Physical address of the new vector table.
> + */
> +
> +#define HVC_SET_VECTORS 2
> +
> +/*
> + * HVC_KVM_CALL_HYP - Execute kvm_call_hyp routine.
> + */
> +
> +#define HVC_KVM_CALL_HYP 3
> +
>  #ifndef __ASSEMBLY__
>
>  /*
> diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
> index 2d960a9..9ab5f70 100644
> --- a/arch/arm64/kernel/hyp-stub.S
> +++ b/arch/arm64/kernel/hyp-stub.S
> @@ -54,16 +54,29 @@ ENDPROC(__hyp_stub_vectors)
>
>  #define ESR_EL2_EC_SHIFT       26
>  #define ESR_EL2_EC_HVC64       0x16
> +#define ESR_EL2_ISS            0xffff
>
>  el1_sync:
> -       mrs     x1, esr_el2
> -       lsr     x1, x1, #ESR_EL2_EC_SHIFT
> -       cmp     x1, #ESR_EL2_EC_HVC64
> -       b.ne    2f                              // Not an HVC trap
> -       cbz     x0, 1f
> -       msr     vbar_el2, x0                    // Set vbar_el2
> +       mrs     x10, esr_el2
> +       lsr     x9, x10, #ESR_EL2_EC_SHIFT      // x9=EC
> +       and     x10, x10, #ESR_EL2_ISS          // x10=ISS
> +
> +       cmp     x9, #ESR_EL2_EC_HVC64
> +       b.ne    2f                              // Not a host HVC trap
> +
> +       mrs     x9, vttbr_el2
> +       cbnz    x9, 2f                          // Not a host HVC trap
> +
> +       cmp     x10, #HVC_GET_VECTORS
> +       b.ne    1f
> +       mrs     x0, vbar_el2
>         b       2f
> -1:     mrs     x0, vbar_el2                    // Return vbar_el2
> +
> +1:     cmp     x10, #HVC_SET_VECTORS
> +       b.ne    1f
> +       msr     vbar_el2, x0
> +
> +1:
>  2:     eret
>  ENDPROC(el1_sync)
>
> @@ -103,11 +116,12 @@ ENDPROC(\label)
>   * initialisation entry point.
>   */
>
> -ENTRY(__hyp_get_vectors)
> -       mov     x0, xzr
> -       // fall through
>  ENTRY(__hyp_set_vectors)
> -       hvc     #0
> +       hvc     #HVC_SET_VECTORS
>         ret
> -ENDPROC(__hyp_get_vectors)
>  ENDPROC(__hyp_set_vectors)
> +
> +ENTRY(__hyp_get_vectors)
> +       hvc     #HVC_GET_VECTORS
> +       ret
> +ENDPROC(__hyp_get_vectors)
> diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
> index b72aa9f..3972ee9 100644
> --- a/arch/arm64/kvm/hyp.S
> +++ b/arch/arm64/kvm/hyp.S
> @@ -26,6 +26,7 @@
>  #include <asm/kvm_asm.h>
>  #include <asm/kvm_arm.h>
>  #include <asm/kvm_mmu.h>
> +#include <asm/virt.h>
>
>  #define CPU_GP_REG_OFFSET(x)   (CPU_GP_REGS + x)
>  #define CPU_XREG_OFFSET(x)     CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x)
> @@ -1105,12 +1106,9 @@ __hyp_panic_str:
>   * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c).  Return values are
>   * passed in r0 and r1.
>   *
> - * A function pointer with a value of 0 has a special meaning, and is
> - * used to implement __hyp_get_vectors in the same way as in
> - * arch/arm64/kernel/hyp_stub.S.
>   */
>  ENTRY(kvm_call_hyp)
> -       hvc     #0
> +       hvc     #HVC_KVM_CALL_HYP
>         ret
>  ENDPROC(kvm_call_hyp)
>
> @@ -1140,6 +1138,7 @@ el1_sync:                                 // Guest trapped into EL2
>         push    x2, x3
>
>         mrs     x1, esr_el2
> +       and     x0, x1, #ESR_EL2_ISS
>         lsr     x2, x1, #ESR_EL2_EC_SHIFT
>
>         cmp     x2, #ESR_EL2_EC_HVC64
> @@ -1149,15 +1148,19 @@ el1_sync:                                       // Guest trapped into EL2
>         cbnz    x3, el1_trap                    // called HVC
>
>         /* Here, we're pretty sure the host called HVC. */
> +       mov     x10, x0
>         pop     x2, x3
>         pop     x0, x1
>
> -       /* Check for __hyp_get_vectors */
> -       cbnz    x0, 1f
> +       cmp     x10, #HVC_GET_VECTORS
> +       b.ne    1f
>         mrs     x0, vbar_el2
>         b       2f
>
> -1:     push    lr, xzr
> +1:     cmp     x10, #HVC_KVM_CALL_HYP
> +       b.ne    1f
> +
> +       push    lr, xzr
>
>         /*
>          * Compute the function address in EL2, and shuffle the parameters.
> @@ -1170,6 +1173,8 @@ el1_sync:                                 // Guest trapped into EL2
>         blr     lr
>
>         pop     lr, xzr
> +
> +1:
>  2:     eret
>
>  el1_trap:
> --
> 1.9.1
>
>
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
-------------- next part --------------
A non-text attachment was scrubbed...
Name: kexec_patch
Type: application/octet-stream
Size: 3766 bytes
Desc: not available
URL: <http://lists.infradead.org/pipermail/linux-arm-kernel/attachments/20140911/0d4ae7ef/attachment.obj>

^ permalink raw reply related	[flat|nested] 80+ messages in thread

* Re: [PATCH 03/13] arm64: Convert hcalls to use ISS field
@ 2014-09-11 16:14     ` Arun Chandran
  0 siblings, 0 replies; 80+ messages in thread
From: Arun Chandran @ 2014-09-11 16:14 UTC (permalink / raw)
  To: Geoff Levand
  Cc: Mark Rutland, Marc Zyngier, Catalin Marinas, Will Deacon,
	linux-arm-kernel, kexec, Christoffer Dall

[-- Attachment #1: Type: text/plain, Size: 10078 bytes --]

Hi Geoff,
On Wed, Sep 10, 2014 at 4:19 AM, Geoff Levand <geoff@infradead.org> wrote:
> To allow for additional hcalls to be defined and to make the arm64 hcall API
> more consistent across exception vector routines change the hcall implementations
> to use the ISS field of the ESR_EL2 register to specify the hcall type.
>
> The existing arm64 hcall implementations are limited in that they only allow
> for two distinct hcalls; with the x0 register either zero, or not zero.  Also,
> the API of the hyp-stub exception vector routines and the KVM exception vector
> routines differ; hyp-stub uses a non-zero value in x0 to implement
> __hyp_set_vectors, whereas KVM uses it to implement kvm_call_hyp.
>
> Define three new preprocessor macros HVC_GET_VECTORS, HVC_SET_VECTORS and
> HVC_KVM_CALL_HYP and to be used as hcall type specifiers and convert the
> existing __hyp_get_vectors(), __hyp_set_vectors() and kvm_call_hyp() routines
> to use these new macros when executing and HVC call.  Also change the
> corresponding hyp-stub and KVM el1_sync exception vector routines to use these
> new macros.
>

What about using a more simplified approach like below.
I was able to use your kexec tree and successfully boot
KVM/ ~KVM combinations with this patch (patch attached).

########################
diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 7a5df52..264d451 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -37,6 +37,8 @@ extern u32 __boot_cpu_mode[2];
 void __hyp_set_vectors(phys_addr_t phys_vector_base);
 phys_addr_t __hyp_get_vectors(void);

+void __hyp_kexec_final_call(unsigned long func_addr) __attribute__((noreturn));
+
 /* Reports the availability of HYP mode */
 static inline bool is_hyp_mode_available(void)
 {
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index a272f33..456af5b 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -52,16 +52,30 @@ ENDPROC(__hyp_stub_vectors)

  .align 11

+#define ESR_EL2_EC_SHIFT 26
+#define ESR_EL2_EC_HVC64 0x16
+#define ESR_EL2_ISS 0xffff
+#define KEXEC_FINAL_CALL 0xffff
+
 el1_sync:
- mrs x1, esr_el2
- lsr x1, x1, #26
- cmp x1, #0x16
- b.ne 2f // Not an HVC trap
- cbz x0, 1f
- msr vbar_el2, x0 // Set vbar_el2
- b 2f
-1: mrs x0, vbar_el2 // Return vbar_el2
-2: eret
+ mrs     x10, esr_el2
+ lsr     x9, x10, #ESR_EL2_EC_SHIFT      // x9=EC
+ and     x10, x10, #ESR_EL2_ISS          // x10=ISS
+
+ cmp     x9, #ESR_EL2_EC_HVC64
+ b.ne    4f                              // Not a host HVC trap
+
+ cbnz x10, 3f // kexec final call
+ cbz x0, 2f
+
+1: msr vbar_el2, x0 // Set vbar_el2
+ eret
+
+2: mrs x0, vbar_el2 // Return vbar_el2
+ eret
+
+3: br x0
+4: eret
 ENDPROC(el1_sync)

 .macro invalid_vector label
@@ -99,12 +113,18 @@ ENDPROC(\label)
  * so you will need to set that to something sensible at the new hypervisor's
  * initialisation entry point.
  */
-
 ENTRY(__hyp_get_vectors)
  mov x0, xzr
- // fall through
-ENTRY(__hyp_set_vectors)
  hvc #0
  ret
 ENDPROC(__hyp_get_vectors)
+
+ENTRY(__hyp_set_vectors)
+ hvc #0
+ ret
 ENDPROC(__hyp_set_vectors)
+
+/* x0 -> final kexec cleanup func addr */
+ENTRY(__hyp_kexec_final_call)
+ hvc #KEXEC_FINAL_CALL
+ENDPROC(__hyp_kexec_final_call)
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index b72aa9f..b5803e3 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -1135,16 +1135,20 @@ ENDPROC(\label)
  invalid_vector el1_fiq_invalid, __kvm_hyp_panic
  invalid_vector el1_error_invalid, __kvm_hyp_panic

+#define ESR_EL2_ISS 0xffff
 el1_sync: // Guest trapped into EL2
  push x0, x1
  push x2, x3

  mrs x1, esr_el2
+ and     x0, x1, #ESR_EL2_ISS
  lsr x2, x1, #ESR_EL2_EC_SHIFT

  cmp x2, #ESR_EL2_EC_HVC64
  b.ne el1_trap

+ cbnz x0, 3f // final kexec call
+
  mrs x3, vttbr_el2 // If vttbr is valid, the 64bit guest
  cbnz x3, el1_trap // called HVC

@@ -1159,6 +1163,7 @@ el1_sync: // Guest trapped into EL2

 1: push lr, xzr

+
  /*
  * Compute the function address in EL2, and shuffle the parameters.
  */
@@ -1172,6 +1177,17 @@ el1_sync: // Guest trapped into EL2
  pop lr, xzr
 2: eret

+ /* Call the kexec clean up function */
+3: pop x2, x3
+ pop x0, x1
+ /* Stage-2 translation */
+ msr vttbr_el2, xzr
+ mrs x1, sctlr_el2
+ bic x1, x1, #1
+ msr sctlr_el2, x1 // disable the MMU
+ isb
+ br x0
+
 el1_trap:
  /*
  * x1: ESR
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 3cb6dec..b961482 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -25,6 +25,7 @@
 #include <asm/hwcap.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/pgtable.h>
+#include <asm/virt.h>

 #include "proc-macros.S"

@@ -69,19 +70,33 @@ ENDPROC(cpu_cache_off)
  */
  .align 5
 ENTRY(cpu_reset)
+ mov x19, x0
+ adr x0, hyp_final_cleanup
+ sub w1, w1, #BOOT_CPU_MODE_EL2
+ cbz w1, __hyp_kexec_final_call
  ret x0
 ENDPROC(cpu_reset)

+ENTRY(hyp_final_cleanup)
+ /* Need to do the final EL2 clean up here */
+ br x19
+ENDPROC(hyp_final_cleanup)
+

##########################

--Arun



> Signed-off-by: Geoff Levand <geoff@infradead.org>
> ---
>  arch/arm64/include/asm/virt.h | 20 ++++++++++++++++++++
>  arch/arm64/kernel/hyp-stub.S  | 38 ++++++++++++++++++++++++++------------
>  arch/arm64/kvm/hyp.S          | 19 ++++++++++++-------
>  3 files changed, 58 insertions(+), 19 deletions(-)
>
> diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
> index 7a5df52..894fe53 100644
> --- a/arch/arm64/include/asm/virt.h
> +++ b/arch/arm64/include/asm/virt.h
> @@ -21,6 +21,26 @@
>  #define BOOT_CPU_MODE_EL1      (0xe11)
>  #define BOOT_CPU_MODE_EL2      (0xe12)
>
> +/*
> + * HVC_GET_VECTORS - Return the value of the vbar_el2 register.
> + */
> +
> +#define HVC_GET_VECTORS 1
> +
> +/*
> + * HVC_SET_VECTORS - Set the value of the vbar_el2 register.
> + *
> + * @x0: Physical address of the new vector table.
> + */
> +
> +#define HVC_SET_VECTORS 2
> +
> +/*
> + * HVC_KVM_CALL_HYP - Execute kvm_call_hyp routine.
> + */
> +
> +#define HVC_KVM_CALL_HYP 3
> +
>  #ifndef __ASSEMBLY__
>
>  /*
> diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
> index 2d960a9..9ab5f70 100644
> --- a/arch/arm64/kernel/hyp-stub.S
> +++ b/arch/arm64/kernel/hyp-stub.S
> @@ -54,16 +54,29 @@ ENDPROC(__hyp_stub_vectors)
>
>  #define ESR_EL2_EC_SHIFT       26
>  #define ESR_EL2_EC_HVC64       0x16
> +#define ESR_EL2_ISS            0xffff
>
>  el1_sync:
> -       mrs     x1, esr_el2
> -       lsr     x1, x1, #ESR_EL2_EC_SHIFT
> -       cmp     x1, #ESR_EL2_EC_HVC64
> -       b.ne    2f                              // Not an HVC trap
> -       cbz     x0, 1f
> -       msr     vbar_el2, x0                    // Set vbar_el2
> +       mrs     x10, esr_el2
> +       lsr     x9, x10, #ESR_EL2_EC_SHIFT      // x9=EC
> +       and     x10, x10, #ESR_EL2_ISS          // x10=ISS
> +
> +       cmp     x9, #ESR_EL2_EC_HVC64
> +       b.ne    2f                              // Not a host HVC trap
> +
> +       mrs     x9, vttbr_el2
> +       cbnz    x9, 2f                          // Not a host HVC trap
> +
> +       cmp     x10, #HVC_GET_VECTORS
> +       b.ne    1f
> +       mrs     x0, vbar_el2
>         b       2f
> -1:     mrs     x0, vbar_el2                    // Return vbar_el2
> +
> +1:     cmp     x10, #HVC_SET_VECTORS
> +       b.ne    1f
> +       msr     vbar_el2, x0
> +
> +1:
>  2:     eret
>  ENDPROC(el1_sync)
>
> @@ -103,11 +116,12 @@ ENDPROC(\label)
>   * initialisation entry point.
>   */
>
> -ENTRY(__hyp_get_vectors)
> -       mov     x0, xzr
> -       // fall through
>  ENTRY(__hyp_set_vectors)
> -       hvc     #0
> +       hvc     #HVC_SET_VECTORS
>         ret
> -ENDPROC(__hyp_get_vectors)
>  ENDPROC(__hyp_set_vectors)
> +
> +ENTRY(__hyp_get_vectors)
> +       hvc     #HVC_GET_VECTORS
> +       ret
> +ENDPROC(__hyp_get_vectors)
> diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
> index b72aa9f..3972ee9 100644
> --- a/arch/arm64/kvm/hyp.S
> +++ b/arch/arm64/kvm/hyp.S
> @@ -26,6 +26,7 @@
>  #include <asm/kvm_asm.h>
>  #include <asm/kvm_arm.h>
>  #include <asm/kvm_mmu.h>
> +#include <asm/virt.h>
>
>  #define CPU_GP_REG_OFFSET(x)   (CPU_GP_REGS + x)
>  #define CPU_XREG_OFFSET(x)     CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x)
> @@ -1105,12 +1106,9 @@ __hyp_panic_str:
>   * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c).  Return values are
>   * passed in r0 and r1.
>   *
> - * A function pointer with a value of 0 has a special meaning, and is
> - * used to implement __hyp_get_vectors in the same way as in
> - * arch/arm64/kernel/hyp_stub.S.
>   */
>  ENTRY(kvm_call_hyp)
> -       hvc     #0
> +       hvc     #HVC_KVM_CALL_HYP
>         ret
>  ENDPROC(kvm_call_hyp)
>
> @@ -1140,6 +1138,7 @@ el1_sync:                                 // Guest trapped into EL2
>         push    x2, x3
>
>         mrs     x1, esr_el2
> +       and     x0, x1, #ESR_EL2_ISS
>         lsr     x2, x1, #ESR_EL2_EC_SHIFT
>
>         cmp     x2, #ESR_EL2_EC_HVC64
> @@ -1149,15 +1148,19 @@ el1_sync:                                       // Guest trapped into EL2
>         cbnz    x3, el1_trap                    // called HVC
>
>         /* Here, we're pretty sure the host called HVC. */
> +       mov     x10, x0
>         pop     x2, x3
>         pop     x0, x1
>
> -       /* Check for __hyp_get_vectors */
> -       cbnz    x0, 1f
> +       cmp     x10, #HVC_GET_VECTORS
> +       b.ne    1f
>         mrs     x0, vbar_el2
>         b       2f
>
> -1:     push    lr, xzr
> +1:     cmp     x10, #HVC_KVM_CALL_HYP
> +       b.ne    1f
> +
> +       push    lr, xzr
>
>         /*
>          * Compute the function address in EL2, and shuffle the parameters.
> @@ -1170,6 +1173,8 @@ el1_sync:                                 // Guest trapped into EL2
>         blr     lr
>
>         pop     lr, xzr
> +
> +1:
>  2:     eret
>
>  el1_trap:
> --
> 1.9.1
>
>
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

[-- Attachment #2: kexec_patch --]
[-- Type: application/octet-stream, Size: 3766 bytes --]

diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 7a5df52..264d451 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -37,6 +37,8 @@ extern u32 __boot_cpu_mode[2];
 void __hyp_set_vectors(phys_addr_t phys_vector_base);
 phys_addr_t __hyp_get_vectors(void);
 
+void __hyp_kexec_final_call(unsigned long func_addr) __attribute__((noreturn));
+
 /* Reports the availability of HYP mode */
 static inline bool is_hyp_mode_available(void)
 {
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index a272f33..456af5b 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -52,16 +52,30 @@ ENDPROC(__hyp_stub_vectors)
 
 	.align 11
 
+#define ESR_EL2_EC_SHIFT	26
+#define ESR_EL2_EC_HVC64	0x16
+#define ESR_EL2_ISS		0xffff
+#define KEXEC_FINAL_CALL	0xffff
+
 el1_sync:
-	mrs	x1, esr_el2
-	lsr	x1, x1, #26
-	cmp	x1, #0x16
-	b.ne	2f				// Not an HVC trap
-	cbz	x0, 1f
-	msr	vbar_el2, x0			// Set vbar_el2
-	b	2f
-1:	mrs	x0, vbar_el2			// Return vbar_el2
-2:	eret
+	mrs     x10, esr_el2
+	lsr     x9, x10, #ESR_EL2_EC_SHIFT      // x9=EC
+	and     x10, x10, #ESR_EL2_ISS          // x10=ISS
+
+	cmp     x9, #ESR_EL2_EC_HVC64
+	b.ne    4f                              // Not a host HVC trap
+
+	cbnz	x10, 3f				// kexec final call
+	cbz	x0, 2f
+
+1:	msr	vbar_el2, x0			// Set vbar_el2
+	eret
+
+2:	mrs	x0, vbar_el2			// Return vbar_el2
+	eret
+
+3:	br	x0
+4:	eret
 ENDPROC(el1_sync)
 
 .macro invalid_vector	label
@@ -99,12 +113,18 @@ ENDPROC(\label)
  * so you will need to set that to something sensible at the new hypervisor's
  * initialisation entry point.
  */
-
 ENTRY(__hyp_get_vectors)
 	mov	x0, xzr
-	// fall through
-ENTRY(__hyp_set_vectors)
 	hvc	#0
 	ret
 ENDPROC(__hyp_get_vectors)
+
+ENTRY(__hyp_set_vectors)
+	hvc	#0
+	ret
 ENDPROC(__hyp_set_vectors)
+
+/* x0 -> final kexec cleanup func addr */
+ENTRY(__hyp_kexec_final_call)
+	hvc	#KEXEC_FINAL_CALL
+ENDPROC(__hyp_kexec_final_call)
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index b72aa9f..b5803e3 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -1135,16 +1135,20 @@ ENDPROC(\label)
 	invalid_vector	el1_fiq_invalid, __kvm_hyp_panic
 	invalid_vector	el1_error_invalid, __kvm_hyp_panic
 
+#define ESR_EL2_ISS		0xffff
 el1_sync:					// Guest trapped into EL2
 	push	x0, x1
 	push	x2, x3
 
 	mrs	x1, esr_el2
+	and     x0, x1, #ESR_EL2_ISS
 	lsr	x2, x1, #ESR_EL2_EC_SHIFT
 
 	cmp	x2, #ESR_EL2_EC_HVC64
 	b.ne	el1_trap
 
+	cbnz	x0, 3f				// final kexec call
+
 	mrs	x3, vttbr_el2			// If vttbr is valid, the 64bit guest
 	cbnz	x3, el1_trap			// called HVC
 
@@ -1159,6 +1163,7 @@ el1_sync:					// Guest trapped into EL2
 
 1:	push	lr, xzr
 
+
 	/*
 	 * Compute the function address in EL2, and shuffle the parameters.
 	 */
@@ -1172,6 +1177,17 @@ el1_sync:					// Guest trapped into EL2
 	pop	lr, xzr
 2:	eret
 
+	/* Call the kexec clean up function */
+3:	pop	x2, x3
+	pop	x0, x1
+	/* Stage-2 translation */
+	msr	vttbr_el2, xzr
+	mrs	x1, sctlr_el2
+	bic	x1, x1, #1
+	msr	sctlr_el2, x1			// disable the MMU
+	isb
+	br	x0
+
 el1_trap:
 	/*
 	 * x1: ESR
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 3cb6dec..b961482 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -25,6 +25,7 @@
 #include <asm/hwcap.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/pgtable.h>
+#include <asm/virt.h>
 
 #include "proc-macros.S"
 
@@ -69,19 +70,33 @@ ENDPROC(cpu_cache_off)
  */
 	.align	5
 ENTRY(cpu_reset)
+	mov	x19, x0
+	adr	x0, hyp_final_cleanup
+	sub	w1, w1, #BOOT_CPU_MODE_EL2
+	cbz	w1, __hyp_kexec_final_call
 	ret	x0
 ENDPROC(cpu_reset)
 
+ENTRY(hyp_final_cleanup)
+	/* Need to do the final EL2 clean up here */
+	br	x19
+ENDPROC(hyp_final_cleanup)
+

[-- Attachment #3: Type: text/plain, Size: 143 bytes --]

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [PATCH 01/13] arm64: Add ESR_EL2_EC macros to hyp-stub
  2014-09-09 22:49   ` Geoff Levand
@ 2014-09-15 16:10     ` Mark Rutland
  -1 siblings, 0 replies; 80+ messages in thread
From: Mark Rutland @ 2014-09-15 16:10 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Sep 09, 2014 at 11:49:04PM +0100, Geoff Levand wrote:
> To improve the readability of the el1_sync routine in hyp-stub.S replace the
> numeric immediate values with preprocessor macros ESR_EL2_EC_SHIFT and
> ESR_EL2_EC_HVC64.
> 
> Signed-off-by: Geoff Levand <geoff@infradead.org>
> ---
>  arch/arm64/kernel/hyp-stub.S | 7 +++++--
>  1 file changed, 5 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
> index a272f33..2d960a9 100644
> --- a/arch/arm64/kernel/hyp-stub.S
> +++ b/arch/arm64/kernel/hyp-stub.S
> @@ -52,10 +52,13 @@ ENDPROC(__hyp_stub_vectors)
>  
>  	.align 11
>  
> +#define ESR_EL2_EC_SHIFT	26
> +#define ESR_EL2_EC_HVC64	0x16

These exist in arch/arm64/include/asm/kvm_arm.h, no?

If anything that should be folded into arch/arm64/include/asm/esr.h...

Mark.

> +
>  el1_sync:
>  	mrs	x1, esr_el2
> -	lsr	x1, x1, #26
> -	cmp	x1, #0x16
> +	lsr	x1, x1, #ESR_EL2_EC_SHIFT
> +	cmp	x1, #ESR_EL2_EC_HVC64
>  	b.ne	2f				// Not an HVC trap
>  	cbz	x0, 1f
>  	msr	vbar_el2, x0			// Set vbar_el2
> -- 
> 1.9.1
> 
> 
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
> 

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH 01/13] arm64: Add ESR_EL2_EC macros to hyp-stub
@ 2014-09-15 16:10     ` Mark Rutland
  0 siblings, 0 replies; 80+ messages in thread
From: Mark Rutland @ 2014-09-15 16:10 UTC (permalink / raw)
  To: Geoff Levand
  Cc: Marc Zyngier, Catalin Marinas, Will Deacon, linux-arm-kernel,
	kexec, christoffer.dall

On Tue, Sep 09, 2014 at 11:49:04PM +0100, Geoff Levand wrote:
> To improve the readability of the el1_sync routine in hyp-stub.S replace the
> numeric immediate values with preprocessor macros ESR_EL2_EC_SHIFT and
> ESR_EL2_EC_HVC64.
> 
> Signed-off-by: Geoff Levand <geoff@infradead.org>
> ---
>  arch/arm64/kernel/hyp-stub.S | 7 +++++--
>  1 file changed, 5 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
> index a272f33..2d960a9 100644
> --- a/arch/arm64/kernel/hyp-stub.S
> +++ b/arch/arm64/kernel/hyp-stub.S
> @@ -52,10 +52,13 @@ ENDPROC(__hyp_stub_vectors)
>  
>  	.align 11
>  
> +#define ESR_EL2_EC_SHIFT	26
> +#define ESR_EL2_EC_HVC64	0x16

These exist in arch/arm64/include/asm/kvm_arm.h, no?

If anything that should be folded into arch/arm64/include/asm/esr.h...

Mark.

> +
>  el1_sync:
>  	mrs	x1, esr_el2
> -	lsr	x1, x1, #26
> -	cmp	x1, #0x16
> +	lsr	x1, x1, #ESR_EL2_EC_SHIFT
> +	cmp	x1, #ESR_EL2_EC_HVC64
>  	b.ne	2f				// Not an HVC trap
>  	cbz	x0, 1f
>  	msr	vbar_el2, x0			// Set vbar_el2
> -- 
> 1.9.1
> 
> 
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
> 

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 02/13] arm64/kvm: Fix assembler compatibility of macros
  2014-09-10 17:09         ` Ard Biesheuvel
@ 2014-09-15 16:14           ` Mark Rutland
  -1 siblings, 0 replies; 80+ messages in thread
From: Mark Rutland @ 2014-09-15 16:14 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, Sep 10, 2014 at 06:09:07PM +0100, Ard Biesheuvel wrote:
> On 10 September 2014 18:35, Geoff Levand <geoff@infradead.org> wrote:
> > On Wed, 2014-09-10 at 10:40 +0200, Ard Biesheuvel wrote:
> >> On 10 September 2014 00:49, Geoff Levand <geoff@infradead.org> wrote:
> >> > Some of the macros defined in kvm_arm.h are useful in the exception vector
> >> > routines, but they are not compatible with the assembler.  Change the
> >> > definition of ESR_EL2_ISS to be compatible.
> >> >
> >> > Fixes build errors like these when using kvm_arm.h in assembly
> >> > source files:
> >> >
> >> >   Error: unexpected characters following instruction at operand 3 -- `add x0,x1,#((1U<<25)-1)'
> >> >
> >> > Signed-off-by: Geoff Levand <geoff@infradead.org>
> >> > ---
> >> >  arch/arm64/include/asm/kvm_arm.h | 2 +-
> >> >  1 file changed, 1 insertion(+), 1 deletion(-)
> >> >
> >> > diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
> >> > index cc83520..e0e7e64 100644
> >> > --- a/arch/arm64/include/asm/kvm_arm.h
> >> > +++ b/arch/arm64/include/asm/kvm_arm.h
> >> > @@ -176,7 +176,7 @@
> >> >  #define ESR_EL2_EC_SHIFT       (26)
> >> >  #define ESR_EL2_EC             (0x3fU << ESR_EL2_EC_SHIFT)
> >> >  #define ESR_EL2_IL             (1U << 25)
> >> > -#define ESR_EL2_ISS            (ESR_EL2_IL - 1)
> >> > +#define ESR_EL2_ISS            (0xffff)
> >>
> >> Don't you mean 0x1ffffff?
> >
> > Hcalls have a 16 bit 'payload', the upper bits of the ISS field
> > are specified as zero by the architecture so 0xffff is the same
> > as 0x1ffffff.
> >
> 
> Even if HVC is currently the only exception we are taking in EL2 (is
> that the case btw?), it seems wrong to define this field in such a way
> that it
> (a) deviates from how the architecture specifies ESR_ELx.ISS and
> (b) may cause surprises once someone unsuspectingly starts and'ing his
> ESR values produced by another exception class with it, expecting the
> macro's value to reflect its name

Agreed. A macro called ESR_EL2_ISS should return the ISS field, and
nothing less.

> >> And, there is a macro UL() for this purpose, so I suppose you could
> >> redefine ESR_EL2_IL as (UL(1) << 25) as well. I know it is not
> >> strictly the same thing, but it should be good enough as this is arm64
> >> only

This sounds good to me.

Mark.

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH 02/13] arm64/kvm: Fix assembler compatibility of macros
@ 2014-09-15 16:14           ` Mark Rutland
  0 siblings, 0 replies; 80+ messages in thread
From: Mark Rutland @ 2014-09-15 16:14 UTC (permalink / raw)
  To: Ard Biesheuvel
  Cc: Geoff Levand, kexec, Will Deacon, Marc Zyngier, Christoffer Dall,
	Catalin Marinas, linux-arm-kernel

On Wed, Sep 10, 2014 at 06:09:07PM +0100, Ard Biesheuvel wrote:
> On 10 September 2014 18:35, Geoff Levand <geoff@infradead.org> wrote:
> > On Wed, 2014-09-10 at 10:40 +0200, Ard Biesheuvel wrote:
> >> On 10 September 2014 00:49, Geoff Levand <geoff@infradead.org> wrote:
> >> > Some of the macros defined in kvm_arm.h are useful in the exception vector
> >> > routines, but they are not compatible with the assembler.  Change the
> >> > definition of ESR_EL2_ISS to be compatible.
> >> >
> >> > Fixes build errors like these when using kvm_arm.h in assembly
> >> > source files:
> >> >
> >> >   Error: unexpected characters following instruction at operand 3 -- `add x0,x1,#((1U<<25)-1)'
> >> >
> >> > Signed-off-by: Geoff Levand <geoff@infradead.org>
> >> > ---
> >> >  arch/arm64/include/asm/kvm_arm.h | 2 +-
> >> >  1 file changed, 1 insertion(+), 1 deletion(-)
> >> >
> >> > diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
> >> > index cc83520..e0e7e64 100644
> >> > --- a/arch/arm64/include/asm/kvm_arm.h
> >> > +++ b/arch/arm64/include/asm/kvm_arm.h
> >> > @@ -176,7 +176,7 @@
> >> >  #define ESR_EL2_EC_SHIFT       (26)
> >> >  #define ESR_EL2_EC             (0x3fU << ESR_EL2_EC_SHIFT)
> >> >  #define ESR_EL2_IL             (1U << 25)
> >> > -#define ESR_EL2_ISS            (ESR_EL2_IL - 1)
> >> > +#define ESR_EL2_ISS            (0xffff)
> >>
> >> Don't you mean 0x1ffffff?
> >
> > Hcalls have a 16 bit 'payload', the upper bits of the ISS field
> > are specified as zero by the architecture so 0xffff is the same
> > as 0x1ffffff.
> >
> 
> Even if HVC is currently the only exception we are taking in EL2 (is
> that the case btw?), it seems wrong to define this field in such a way
> that it
> (a) deviates from how the architecture specifies ESR_ELx.ISS and
> (b) may cause surprises once someone unsuspectingly starts and'ing his
> ESR values produced by another exception class with it, expecting the
> macro's value to reflect its name

Agreed. A macro called ESR_EL2_ISS should return the ISS field, and
nothing less.

> >> And, there is a macro UL() for this purpose, so I suppose you could
> >> redefine ESR_EL2_IL as (UL(1) << 25) as well. I know it is not
> >> strictly the same thing, but it should be good enough as this is arm64
> >> only

This sounds good to me.

Mark.

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 03/13] arm64: Convert hcalls to use ISS field
  2014-09-09 22:49   ` Geoff Levand
@ 2014-09-15 17:57     ` Mark Rutland
  -1 siblings, 0 replies; 80+ messages in thread
From: Mark Rutland @ 2014-09-15 17:57 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Geoff,

The general approach looks good to me, using the HVC immediate makes
this look far nicer to me. Hopefully Marc and Christoffer agree on that.

That said, I have some comments on the mechanics below.

On Tue, Sep 09, 2014 at 11:49:04PM +0100, Geoff Levand wrote:
> To allow for additional hcalls to be defined and to make the arm64 hcall API
> more consistent across exception vector routines change the hcall implementations
> to use the ISS field of the ESR_EL2 register to specify the hcall type.
> 
> The existing arm64 hcall implementations are limited in that they only allow
> for two distinct hcalls; with the x0 register either zero, or not zero.  Also,
> the API of the hyp-stub exception vector routines and the KVM exception vector
> routines differ; hyp-stub uses a non-zero value in x0 to implement
> __hyp_set_vectors, whereas KVM uses it to implement kvm_call_hyp.
> 
> Define three new preprocessor macros HVC_GET_VECTORS, HVC_SET_VECTORS and
> HVC_KVM_CALL_HYP and to be used as hcall type specifiers and convert the
> existing __hyp_get_vectors(), __hyp_set_vectors() and kvm_call_hyp() routines
> to use these new macros when executing and HVC call.  Also change the
> corresponding hyp-stub and KVM el1_sync exception vector routines to use these
> new macros.
> 
> Signed-off-by: Geoff Levand <geoff@infradead.org>
> ---
>  arch/arm64/include/asm/virt.h | 20 ++++++++++++++++++++
>  arch/arm64/kernel/hyp-stub.S  | 38 ++++++++++++++++++++++++++------------
>  arch/arm64/kvm/hyp.S          | 19 ++++++++++++-------
>  3 files changed, 58 insertions(+), 19 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
> index 7a5df52..894fe53 100644
> --- a/arch/arm64/include/asm/virt.h
> +++ b/arch/arm64/include/asm/virt.h
> @@ -21,6 +21,26 @@
>  #define BOOT_CPU_MODE_EL1	(0xe11)
>  #define BOOT_CPU_MODE_EL2	(0xe12)
>  
> +/*
> + * HVC_GET_VECTORS - Return the value of the vbar_el2 register.
> + */
> +
> +#define HVC_GET_VECTORS 1
> +
> +/*
> + * HVC_SET_VECTORS - Set the value of the vbar_el2 register.
> + *
> + * @x0: Physical address of the new vector table.
> + */
> +
> +#define HVC_SET_VECTORS 2
> +
> +/*
> + * HVC_KVM_CALL_HYP - Execute kvm_call_hyp routine.
> + */
> +
> +#define HVC_KVM_CALL_HYP 3

If this can be used without KVM (e.g. in the hyp stub) I'd just call
this HVC_CALL_HYP, or the name will be a little misleading.

> +
>  #ifndef __ASSEMBLY__
>  
>  /*
> diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
> index 2d960a9..9ab5f70 100644
> --- a/arch/arm64/kernel/hyp-stub.S
> +++ b/arch/arm64/kernel/hyp-stub.S
> @@ -54,16 +54,29 @@ ENDPROC(__hyp_stub_vectors)
>  
>  #define ESR_EL2_EC_SHIFT	26
>  #define ESR_EL2_EC_HVC64	0x16
> +#define ESR_EL2_ISS		0xffff

The last patch tried to add an identical macro to a header file. Can we
use that header please?

>  
>  el1_sync:
> -	mrs	x1, esr_el2
> -	lsr	x1, x1, #ESR_EL2_EC_SHIFT
> -	cmp	x1, #ESR_EL2_EC_HVC64
> -	b.ne	2f				// Not an HVC trap
> -	cbz	x0, 1f
> -	msr	vbar_el2, x0			// Set vbar_el2
> +	mrs	x10, esr_el2

Any reason for using x10?

If we want to preserve the lowest register numbers, start with the
highest caller-saved register numbers (i.e. x18). At least for me it
makes the code far easier to read; it doesn't make it look like x10 is
special.

> +	lsr	x9, x10, #ESR_EL2_EC_SHIFT	// x9=EC
> +	and	x10, x10, #ESR_EL2_ISS		// x10=ISS

The mnemonics make these comments redundant.

> +	cmp     x9, #ESR_EL2_EC_HVC64
> +	b.ne    2f                              // Not a host HVC trap

Now that we have the nice mnemonic, we could get rid of the comment
here. I'd drop the 'host' from the comment; it wasn't there orginally
and it's somewhat meaningless for the stub (KVM isn't up yet, and the
only the native OS can make a HVC).

> +	mrs     x9, vttbr_el2
> +	cbnz    x9, 2f                          // Not a host HVC trap

I don't understand this. When is vttbr_el2 non-zero, and why do we want
to silently return from a HVC in that case? That didn't seem to be the
case in the original code.

> +
> +	cmp	x10, #HVC_GET_VECTORS
> +	b.ne	1f
> +	mrs	x0, vbar_el2
>  	b	2f
> -1:	mrs	x0, vbar_el2			// Return vbar_el2
> +
> +1:	cmp	x10, #HVC_SET_VECTORS
> +	b.ne	1f
> +	msr	vbar_el2, x0
> +
> +1:

It feels like we should explode if we ever reach here from the host --
if we've made an unsupported HVC wereally want to know that we've done
so.

>  2:	eret
>  ENDPROC(el1_sync)
>  
> @@ -103,11 +116,12 @@ ENDPROC(\label)
>   * initialisation entry point.
>   */
>  
> -ENTRY(__hyp_get_vectors)
> -	mov	x0, xzr
> -	// fall through
>  ENTRY(__hyp_set_vectors)
> -	hvc	#0
> +	hvc	#HVC_SET_VECTORS
>  	ret
> -ENDPROC(__hyp_get_vectors)
>  ENDPROC(__hyp_set_vectors)
> +
> +ENTRY(__hyp_get_vectors)
> +	hvc	#HVC_GET_VECTORS
> +	ret
> +ENDPROC(__hyp_get_vectors)
> diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
> index b72aa9f..3972ee9 100644
> --- a/arch/arm64/kvm/hyp.S
> +++ b/arch/arm64/kvm/hyp.S
> @@ -26,6 +26,7 @@
>  #include <asm/kvm_asm.h>
>  #include <asm/kvm_arm.h>
>  #include <asm/kvm_mmu.h>
> +#include <asm/virt.h>
>  
>  #define CPU_GP_REG_OFFSET(x)	(CPU_GP_REGS + x)
>  #define CPU_XREG_OFFSET(x)	CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x)
> @@ -1105,12 +1106,9 @@ __hyp_panic_str:
>   * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c).  Return values are
>   * passed in r0 and r1.
>   *
> - * A function pointer with a value of 0 has a special meaning, and is
> - * used to implement __hyp_get_vectors in the same way as in
> - * arch/arm64/kernel/hyp_stub.S.
>   */
>  ENTRY(kvm_call_hyp)
> -	hvc	#0
> +	hvc	#HVC_KVM_CALL_HYP
>  	ret
>  ENDPROC(kvm_call_hyp)
>  
> @@ -1140,6 +1138,7 @@ el1_sync:					// Guest trapped into EL2
>  	push	x2, x3
>  
>  	mrs	x1, esr_el2
> +	and	x0, x1, #ESR_EL2_ISS
>  	lsr	x2, x1, #ESR_EL2_EC_SHIFT
>  
>  	cmp	x2, #ESR_EL2_EC_HVC64
> @@ -1149,15 +1148,19 @@ el1_sync:					// Guest trapped into EL2
>  	cbnz	x3, el1_trap			// called HVC
>  
>  	/* Here, we're pretty sure the host called HVC. */
> +	mov	x10, x0

As above, please use the highest numbered caller-saved register so as to
not make the register numbering look special.

>  	pop	x2, x3
>  	pop	x0, x1
>  
> -	/* Check for __hyp_get_vectors */
> -	cbnz	x0, 1f
> +	cmp	x10, #HVC_GET_VECTORS
> +	b.ne	1f
>  	mrs	x0, vbar_el2
>  	b	2f
>  
> -1:	push	lr, xzr
> +1:	cmp	x10, #HVC_KVM_CALL_HYP
> +	b.ne	1f
> +
> +	push	lr, xzr
>  
>  	/*
>  	 * Compute the function address in EL2, and shuffle the parameters.
> @@ -1170,6 +1173,8 @@ el1_sync:					// Guest trapped into EL2
>  	blr	lr
>  
>  	pop	lr, xzr
> +
> +1:
>  2:	eret

Any reason we need two labels here?

If we've got here with an invalid HVC immediate, I think we should
explode loudly.

Mark.

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH 03/13] arm64: Convert hcalls to use ISS field
@ 2014-09-15 17:57     ` Mark Rutland
  0 siblings, 0 replies; 80+ messages in thread
From: Mark Rutland @ 2014-09-15 17:57 UTC (permalink / raw)
  To: Geoff Levand
  Cc: Marc Zyngier, Catalin Marinas, Will Deacon, linux-arm-kernel,
	kexec, christoffer.dall

Hi Geoff,

The general approach looks good to me, using the HVC immediate makes
this look far nicer to me. Hopefully Marc and Christoffer agree on that.

That said, I have some comments on the mechanics below.

On Tue, Sep 09, 2014 at 11:49:04PM +0100, Geoff Levand wrote:
> To allow for additional hcalls to be defined and to make the arm64 hcall API
> more consistent across exception vector routines change the hcall implementations
> to use the ISS field of the ESR_EL2 register to specify the hcall type.
> 
> The existing arm64 hcall implementations are limited in that they only allow
> for two distinct hcalls; with the x0 register either zero, or not zero.  Also,
> the API of the hyp-stub exception vector routines and the KVM exception vector
> routines differ; hyp-stub uses a non-zero value in x0 to implement
> __hyp_set_vectors, whereas KVM uses it to implement kvm_call_hyp.
> 
> Define three new preprocessor macros HVC_GET_VECTORS, HVC_SET_VECTORS and
> HVC_KVM_CALL_HYP and to be used as hcall type specifiers and convert the
> existing __hyp_get_vectors(), __hyp_set_vectors() and kvm_call_hyp() routines
> to use these new macros when executing and HVC call.  Also change the
> corresponding hyp-stub and KVM el1_sync exception vector routines to use these
> new macros.
> 
> Signed-off-by: Geoff Levand <geoff@infradead.org>
> ---
>  arch/arm64/include/asm/virt.h | 20 ++++++++++++++++++++
>  arch/arm64/kernel/hyp-stub.S  | 38 ++++++++++++++++++++++++++------------
>  arch/arm64/kvm/hyp.S          | 19 ++++++++++++-------
>  3 files changed, 58 insertions(+), 19 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
> index 7a5df52..894fe53 100644
> --- a/arch/arm64/include/asm/virt.h
> +++ b/arch/arm64/include/asm/virt.h
> @@ -21,6 +21,26 @@
>  #define BOOT_CPU_MODE_EL1	(0xe11)
>  #define BOOT_CPU_MODE_EL2	(0xe12)
>  
> +/*
> + * HVC_GET_VECTORS - Return the value of the vbar_el2 register.
> + */
> +
> +#define HVC_GET_VECTORS 1
> +
> +/*
> + * HVC_SET_VECTORS - Set the value of the vbar_el2 register.
> + *
> + * @x0: Physical address of the new vector table.
> + */
> +
> +#define HVC_SET_VECTORS 2
> +
> +/*
> + * HVC_KVM_CALL_HYP - Execute kvm_call_hyp routine.
> + */
> +
> +#define HVC_KVM_CALL_HYP 3

If this can be used without KVM (e.g. in the hyp stub) I'd just call
this HVC_CALL_HYP, or the name will be a little misleading.

> +
>  #ifndef __ASSEMBLY__
>  
>  /*
> diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
> index 2d960a9..9ab5f70 100644
> --- a/arch/arm64/kernel/hyp-stub.S
> +++ b/arch/arm64/kernel/hyp-stub.S
> @@ -54,16 +54,29 @@ ENDPROC(__hyp_stub_vectors)
>  
>  #define ESR_EL2_EC_SHIFT	26
>  #define ESR_EL2_EC_HVC64	0x16
> +#define ESR_EL2_ISS		0xffff

The last patch tried to add an identical macro to a header file. Can we
use that header please?

>  
>  el1_sync:
> -	mrs	x1, esr_el2
> -	lsr	x1, x1, #ESR_EL2_EC_SHIFT
> -	cmp	x1, #ESR_EL2_EC_HVC64
> -	b.ne	2f				// Not an HVC trap
> -	cbz	x0, 1f
> -	msr	vbar_el2, x0			// Set vbar_el2
> +	mrs	x10, esr_el2

Any reason for using x10?

If we want to preserve the lowest register numbers, start with the
highest caller-saved register numbers (i.e. x18). At least for me it
makes the code far easier to read; it doesn't make it look like x10 is
special.

> +	lsr	x9, x10, #ESR_EL2_EC_SHIFT	// x9=EC
> +	and	x10, x10, #ESR_EL2_ISS		// x10=ISS

The mnemonics make these comments redundant.

> +	cmp     x9, #ESR_EL2_EC_HVC64
> +	b.ne    2f                              // Not a host HVC trap

Now that we have the nice mnemonic, we could get rid of the comment
here. I'd drop the 'host' from the comment; it wasn't there orginally
and it's somewhat meaningless for the stub (KVM isn't up yet, and the
only the native OS can make a HVC).

> +	mrs     x9, vttbr_el2
> +	cbnz    x9, 2f                          // Not a host HVC trap

I don't understand this. When is vttbr_el2 non-zero, and why do we want
to silently return from a HVC in that case? That didn't seem to be the
case in the original code.

> +
> +	cmp	x10, #HVC_GET_VECTORS
> +	b.ne	1f
> +	mrs	x0, vbar_el2
>  	b	2f
> -1:	mrs	x0, vbar_el2			// Return vbar_el2
> +
> +1:	cmp	x10, #HVC_SET_VECTORS
> +	b.ne	1f
> +	msr	vbar_el2, x0
> +
> +1:

It feels like we should explode if we ever reach here from the host --
if we've made an unsupported HVC wereally want to know that we've done
so.

>  2:	eret
>  ENDPROC(el1_sync)
>  
> @@ -103,11 +116,12 @@ ENDPROC(\label)
>   * initialisation entry point.
>   */
>  
> -ENTRY(__hyp_get_vectors)
> -	mov	x0, xzr
> -	// fall through
>  ENTRY(__hyp_set_vectors)
> -	hvc	#0
> +	hvc	#HVC_SET_VECTORS
>  	ret
> -ENDPROC(__hyp_get_vectors)
>  ENDPROC(__hyp_set_vectors)
> +
> +ENTRY(__hyp_get_vectors)
> +	hvc	#HVC_GET_VECTORS
> +	ret
> +ENDPROC(__hyp_get_vectors)
> diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
> index b72aa9f..3972ee9 100644
> --- a/arch/arm64/kvm/hyp.S
> +++ b/arch/arm64/kvm/hyp.S
> @@ -26,6 +26,7 @@
>  #include <asm/kvm_asm.h>
>  #include <asm/kvm_arm.h>
>  #include <asm/kvm_mmu.h>
> +#include <asm/virt.h>
>  
>  #define CPU_GP_REG_OFFSET(x)	(CPU_GP_REGS + x)
>  #define CPU_XREG_OFFSET(x)	CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x)
> @@ -1105,12 +1106,9 @@ __hyp_panic_str:
>   * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c).  Return values are
>   * passed in r0 and r1.
>   *
> - * A function pointer with a value of 0 has a special meaning, and is
> - * used to implement __hyp_get_vectors in the same way as in
> - * arch/arm64/kernel/hyp_stub.S.
>   */
>  ENTRY(kvm_call_hyp)
> -	hvc	#0
> +	hvc	#HVC_KVM_CALL_HYP
>  	ret
>  ENDPROC(kvm_call_hyp)
>  
> @@ -1140,6 +1138,7 @@ el1_sync:					// Guest trapped into EL2
>  	push	x2, x3
>  
>  	mrs	x1, esr_el2
> +	and	x0, x1, #ESR_EL2_ISS
>  	lsr	x2, x1, #ESR_EL2_EC_SHIFT
>  
>  	cmp	x2, #ESR_EL2_EC_HVC64
> @@ -1149,15 +1148,19 @@ el1_sync:					// Guest trapped into EL2
>  	cbnz	x3, el1_trap			// called HVC
>  
>  	/* Here, we're pretty sure the host called HVC. */
> +	mov	x10, x0

As above, please use the highest numbered caller-saved register so as to
not make the register numbering look special.

>  	pop	x2, x3
>  	pop	x0, x1
>  
> -	/* Check for __hyp_get_vectors */
> -	cbnz	x0, 1f
> +	cmp	x10, #HVC_GET_VECTORS
> +	b.ne	1f
>  	mrs	x0, vbar_el2
>  	b	2f
>  
> -1:	push	lr, xzr
> +1:	cmp	x10, #HVC_KVM_CALL_HYP
> +	b.ne	1f
> +
> +	push	lr, xzr
>  
>  	/*
>  	 * Compute the function address in EL2, and shuffle the parameters.
> @@ -1170,6 +1173,8 @@ el1_sync:					// Guest trapped into EL2
>  	blr	lr
>  
>  	pop	lr, xzr
> +
> +1:
>  2:	eret

Any reason we need two labels here?

If we've got here with an invalid HVC immediate, I think we should
explode loudly.

Mark.

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 04/13] arm64: Add new hcall HVC_CALL_FUNC
  2014-09-09 22:49   ` Geoff Levand
@ 2014-09-15 18:11     ` Mark Rutland
  -1 siblings, 0 replies; 80+ messages in thread
From: Mark Rutland @ 2014-09-15 18:11 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Sep 09, 2014 at 11:49:04PM +0100, Geoff Levand wrote:
> Add the new hcall HVC_CALL_FUNC that allows execution of a function at EL2.
> During CPU reset the CPU must be brought to the exception level it had on
> entry to the kernel.  The HVC_CALL_FUNC hcall will provide the mechanism
> needed for this exception level switch.
> 
> Signed-off-by: Geoff Levand <geoff@infradead.org>
> ---
>  arch/arm64/include/asm/virt.h | 11 +++++++++++
>  arch/arm64/kernel/hyp-stub.S  | 10 ++++++++++
>  2 files changed, 21 insertions(+)
> 
> diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
> index 894fe53..b217fbc 100644
> --- a/arch/arm64/include/asm/virt.h
> +++ b/arch/arm64/include/asm/virt.h
> @@ -41,6 +41,17 @@
>  
>  #define HVC_KVM_CALL_HYP 3
>  
> +/*
> + * HVC_CALL_FUNC - Execute a function at EL2.
> + *
> + * @x0: Physical address of the funtion to be executed.
> + * @x1: Passed as the first argument to @fn.
> + * @x2: Passed as the second argument to @fn.
> + * @x3: Passed as the third argument to @fn.
> + */
> +
> +#define HVC_CALL_FUNC 4
> +

Can't we use the HVC_KVM_CALL_HYP for this as well? I thought we already
added the code to the stub to do that in the last patch.

Is there a difference between the two that I'm missing?

>  #ifndef __ASSEMBLY__
>  
>  /*
> diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
> index 9ab5f70..a21cf51 100644
> --- a/arch/arm64/kernel/hyp-stub.S
> +++ b/arch/arm64/kernel/hyp-stub.S
> @@ -75,7 +75,17 @@ el1_sync:
>  1:	cmp	x10, #HVC_SET_VECTORS
>  	b.ne	1f
>  	msr	vbar_el2, x0
> +	b	2f
>  
> +1:	cmp	x10, #HVC_CALL_FUNC
> +	b.ne    1f
> +	mov	x29, lr

What's the contract for functions we call through the stub?

If they can use all the caller-saved registers, then we need to stach
the original LR before issuing the HVC. Otherwise we can stash it in
x18 at EL2.

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH 04/13] arm64: Add new hcall HVC_CALL_FUNC
@ 2014-09-15 18:11     ` Mark Rutland
  0 siblings, 0 replies; 80+ messages in thread
From: Mark Rutland @ 2014-09-15 18:11 UTC (permalink / raw)
  To: Geoff Levand
  Cc: Marc Zyngier, Catalin Marinas, Will Deacon, linux-arm-kernel,
	kexec, christoffer.dall

On Tue, Sep 09, 2014 at 11:49:04PM +0100, Geoff Levand wrote:
> Add the new hcall HVC_CALL_FUNC that allows execution of a function at EL2.
> During CPU reset the CPU must be brought to the exception level it had on
> entry to the kernel.  The HVC_CALL_FUNC hcall will provide the mechanism
> needed for this exception level switch.
> 
> Signed-off-by: Geoff Levand <geoff@infradead.org>
> ---
>  arch/arm64/include/asm/virt.h | 11 +++++++++++
>  arch/arm64/kernel/hyp-stub.S  | 10 ++++++++++
>  2 files changed, 21 insertions(+)
> 
> diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
> index 894fe53..b217fbc 100644
> --- a/arch/arm64/include/asm/virt.h
> +++ b/arch/arm64/include/asm/virt.h
> @@ -41,6 +41,17 @@
>  
>  #define HVC_KVM_CALL_HYP 3
>  
> +/*
> + * HVC_CALL_FUNC - Execute a function at EL2.
> + *
> + * @x0: Physical address of the funtion to be executed.
> + * @x1: Passed as the first argument to @fn.
> + * @x2: Passed as the second argument to @fn.
> + * @x3: Passed as the third argument to @fn.
> + */
> +
> +#define HVC_CALL_FUNC 4
> +

Can't we use the HVC_KVM_CALL_HYP for this as well? I thought we already
added the code to the stub to do that in the last patch.

Is there a difference between the two that I'm missing?

>  #ifndef __ASSEMBLY__
>  
>  /*
> diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
> index 9ab5f70..a21cf51 100644
> --- a/arch/arm64/kernel/hyp-stub.S
> +++ b/arch/arm64/kernel/hyp-stub.S
> @@ -75,7 +75,17 @@ el1_sync:
>  1:	cmp	x10, #HVC_SET_VECTORS
>  	b.ne	1f
>  	msr	vbar_el2, x0
> +	b	2f
>  
> +1:	cmp	x10, #HVC_CALL_FUNC
> +	b.ne    1f
> +	mov	x29, lr

What's the contract for functions we call through the stub?

If they can use all the caller-saved registers, then we need to stach
the original LR before issuing the HVC. Otherwise we can stash it in
x18 at EL2.

Thanks,
Mark.

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 06/13] arm64: Add new routine read_cpu_properties
  2014-09-09 22:49   ` Geoff Levand
@ 2014-09-15 18:42     ` Mark Rutland
  -1 siblings, 0 replies; 80+ messages in thread
From: Mark Rutland @ 2014-09-15 18:42 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Geoff,

On Tue, Sep 09, 2014 at 11:49:04PM +0100, Geoff Levand wrote:
> The kexec re-boot support that will be added in a subsequent patch in this
> series will need to read the device tree CPU properties, and it is expected
> that a rework of the SMP spin table code to handle cpu_die will also need this
> functionality, so add two new common arm64 files cpu-properties.h and
> cpu-properties.c that define a new structure cpu_properties that hold the
> various CPU properties from a device tree, and the new routine
> read_cpu_properties() that fills the structure from a device tree CPU node.

I'm very much not keen on placing all this information in a single
structure -- that adds a new tight coupling that we didn't have before,
and it looks like it's going to be painful to maintain.

If kexec uses the existing high-level hotplug infrastructure, it has no
reason to go anywhere near this information.

So I really don't like this patch.

Why do you think we need this information to be centralized in this way?

Thanks,
Mark.

> 
> Signed-off-by: Geoff Levand <geoff@infradead.org>
> ---
>  arch/arm64/kernel/cpu-properties.c | 58 ++++++++++++++++++++++++++++++++++++++
>  arch/arm64/kernel/cpu-properties.h | 39 +++++++++++++++++++++++++
>  2 files changed, 97 insertions(+)
>  create mode 100644 arch/arm64/kernel/cpu-properties.c
>  create mode 100644 arch/arm64/kernel/cpu-properties.h
> 
> diff --git a/arch/arm64/kernel/cpu-properties.c b/arch/arm64/kernel/cpu-properties.c
> new file mode 100644
> index 0000000..e64b34b
> --- /dev/null
> +++ b/arch/arm64/kernel/cpu-properties.c
> @@ -0,0 +1,58 @@
> +/*
> + * Copyright (C) Linaro.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include "cpu-properties.h"
> +
> +int read_cpu_properties(struct cpu_properties *p, const struct device_node *dn)
> +{
> +	const u32 *cell;
> +
> +	memset(p, 0, sizeof(*p));
> +	p->hwid = INVALID_HWID;
> +	p->cpu_release_addr = INVALID_ADDR;
> +
> +	cell = of_get_property(dn, "reg", NULL);
> +
> +	if (!cell) {
> +		pr_err("%s: Error: %s: invalid reg property\n",
> +		       __func__, dn->full_name);
> +		return -1;
> +	}
> +
> +	p->hwid = of_read_number(cell,
> +		of_n_addr_cells((struct device_node *)dn)) & MPIDR_HWID_BITMASK;
> +
> +	p->enable_method = of_get_property(dn, "enable-method", NULL);
> +
> +	if (!p->enable_method) {
> +		pr_err("%s: Error: %s: invalid enable-method\n",
> +		       __func__, dn->full_name);
> +		return -1;
> +	}
> +
> +	if (!strcmp(p->enable_method, "psci")) {
> +		p->type = cpu_enable_method_psci;
> +		return 0;
> +	}
> +
> +	if (strcmp(p->enable_method, "spin-table")) {
> +		p->type = cpu_enable_method_unknown;
> +		return -1;
> +	}
> +
> +	p->type = cpu_enable_method_spin_table;
> +
> +	if (of_property_read_u64(dn, "cpu-release-addr",
> +				 &p->cpu_release_addr)) {
> +		pr_err("%s: Error: %s: invalid cpu-return-addr property\n",
> +		       __func__, dn->full_name);
> +		return -1;
> +	}
> +
> +	return 0;
> +}
> diff --git a/arch/arm64/kernel/cpu-properties.h b/arch/arm64/kernel/cpu-properties.h
> new file mode 100644
> index 0000000..b4218ef
> --- /dev/null
> +++ b/arch/arm64/kernel/cpu-properties.h
> @@ -0,0 +1,39 @@
> +/*
> + * Copyright (C) Linaro.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#if !defined(__ARM64_CPU_PROPERTIES_H)
> +#define __ARM64_CPU_PROPERTIES_H
> +
> +#include <asm/memory.h>
> +#include <asm/cputype.h>
> +
> +#define INVALID_ADDR UL(~0)
> +
> +#if !defined(__ASSEMBLY__)
> +
> +#include <linux/kernel.h>
> +#include <linux/of.h>
> +
> +enum cpu_enable_method {
> +	cpu_enable_method_unknown,
> +	cpu_enable_method_psci,
> +	cpu_enable_method_spin_table,
> +};
> +
> +struct cpu_properties {
> +	u64 hwid;
> +	u64 cpu_release_addr;
> +	const char *enable_method;
> +	enum cpu_enable_method type;
> +};
> +
> +int read_cpu_properties(struct cpu_properties *p, const struct device_node *dn);
> +
> +#endif /* !defined(__ASSEMBLY__) */
> +
> +#endif
> -- 
> 1.9.1
> 
> 
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
> 

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH 06/13] arm64: Add new routine read_cpu_properties
@ 2014-09-15 18:42     ` Mark Rutland
  0 siblings, 0 replies; 80+ messages in thread
From: Mark Rutland @ 2014-09-15 18:42 UTC (permalink / raw)
  To: Geoff Levand
  Cc: Marc Zyngier, Catalin Marinas, Will Deacon, linux-arm-kernel,
	kexec, christoffer.dall

Hi Geoff,

On Tue, Sep 09, 2014 at 11:49:04PM +0100, Geoff Levand wrote:
> The kexec re-boot support that will be added in a subsequent patch in this
> series will need to read the device tree CPU properties, and it is expected
> that a rework of the SMP spin table code to handle cpu_die will also need this
> functionality, so add two new common arm64 files cpu-properties.h and
> cpu-properties.c that define a new structure cpu_properties that hold the
> various CPU properties from a device tree, and the new routine
> read_cpu_properties() that fills the structure from a device tree CPU node.

I'm very much not keen on placing all this information in a single
structure -- that adds a new tight coupling that we didn't have before,
and it looks like it's going to be painful to maintain.

If kexec uses the existing high-level hotplug infrastructure, it has no
reason to go anywhere near this information.

So I really don't like this patch.

Why do you think we need this information to be centralized in this way?

Thanks,
Mark.

> 
> Signed-off-by: Geoff Levand <geoff@infradead.org>
> ---
>  arch/arm64/kernel/cpu-properties.c | 58 ++++++++++++++++++++++++++++++++++++++
>  arch/arm64/kernel/cpu-properties.h | 39 +++++++++++++++++++++++++
>  2 files changed, 97 insertions(+)
>  create mode 100644 arch/arm64/kernel/cpu-properties.c
>  create mode 100644 arch/arm64/kernel/cpu-properties.h
> 
> diff --git a/arch/arm64/kernel/cpu-properties.c b/arch/arm64/kernel/cpu-properties.c
> new file mode 100644
> index 0000000..e64b34b
> --- /dev/null
> +++ b/arch/arm64/kernel/cpu-properties.c
> @@ -0,0 +1,58 @@
> +/*
> + * Copyright (C) Linaro.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include "cpu-properties.h"
> +
> +int read_cpu_properties(struct cpu_properties *p, const struct device_node *dn)
> +{
> +	const u32 *cell;
> +
> +	memset(p, 0, sizeof(*p));
> +	p->hwid = INVALID_HWID;
> +	p->cpu_release_addr = INVALID_ADDR;
> +
> +	cell = of_get_property(dn, "reg", NULL);
> +
> +	if (!cell) {
> +		pr_err("%s: Error: %s: invalid reg property\n",
> +		       __func__, dn->full_name);
> +		return -1;
> +	}
> +
> +	p->hwid = of_read_number(cell,
> +		of_n_addr_cells((struct device_node *)dn)) & MPIDR_HWID_BITMASK;
> +
> +	p->enable_method = of_get_property(dn, "enable-method", NULL);
> +
> +	if (!p->enable_method) {
> +		pr_err("%s: Error: %s: invalid enable-method\n",
> +		       __func__, dn->full_name);
> +		return -1;
> +	}
> +
> +	if (!strcmp(p->enable_method, "psci")) {
> +		p->type = cpu_enable_method_psci;
> +		return 0;
> +	}
> +
> +	if (strcmp(p->enable_method, "spin-table")) {
> +		p->type = cpu_enable_method_unknown;
> +		return -1;
> +	}
> +
> +	p->type = cpu_enable_method_spin_table;
> +
> +	if (of_property_read_u64(dn, "cpu-release-addr",
> +				 &p->cpu_release_addr)) {
> +		pr_err("%s: Error: %s: invalid cpu-return-addr property\n",
> +		       __func__, dn->full_name);
> +		return -1;
> +	}
> +
> +	return 0;
> +}
> diff --git a/arch/arm64/kernel/cpu-properties.h b/arch/arm64/kernel/cpu-properties.h
> new file mode 100644
> index 0000000..b4218ef
> --- /dev/null
> +++ b/arch/arm64/kernel/cpu-properties.h
> @@ -0,0 +1,39 @@
> +/*
> + * Copyright (C) Linaro.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#if !defined(__ARM64_CPU_PROPERTIES_H)
> +#define __ARM64_CPU_PROPERTIES_H
> +
> +#include <asm/memory.h>
> +#include <asm/cputype.h>
> +
> +#define INVALID_ADDR UL(~0)
> +
> +#if !defined(__ASSEMBLY__)
> +
> +#include <linux/kernel.h>
> +#include <linux/of.h>
> +
> +enum cpu_enable_method {
> +	cpu_enable_method_unknown,
> +	cpu_enable_method_psci,
> +	cpu_enable_method_spin_table,
> +};
> +
> +struct cpu_properties {
> +	u64 hwid;
> +	u64 cpu_release_addr;
> +	const char *enable_method;
> +	enum cpu_enable_method type;
> +};
> +
> +int read_cpu_properties(struct cpu_properties *p, const struct device_node *dn);
> +
> +#endif /* !defined(__ASSEMBLY__) */
> +
> +#endif
> -- 
> 1.9.1
> 
> 
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
> 

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 07/13] arm64: Add new routine local_disable
  2014-09-09 22:49   ` Geoff Levand
@ 2014-09-15 18:56     ` Mark Rutland
  -1 siblings, 0 replies; 80+ messages in thread
From: Mark Rutland @ 2014-09-15 18:56 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Geoff,

On Tue, Sep 09, 2014 at 11:49:04PM +0100, Geoff Levand wrote:
> Add the new arm64 routine local_disable() to allow the masking of several DAIF
> flags in one operation.  Currently, we only have routines to mask individual
> flags, and to mask several flags multiple calls to daifset are required.
>
> Signed-off-by: Geoff Levand <geoff@infradead.org>
> ---
>  arch/arm64/include/asm/irqflags.h | 13 +++++++++++++
>  1 file changed, 13 insertions(+)
> 
> diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
> index 11cc941..28521d4 100644
> --- a/arch/arm64/include/asm/irqflags.h
> +++ b/arch/arm64/include/asm/irqflags.h
> @@ -113,5 +113,18 @@ static inline int arch_irqs_disabled_flags(unsigned long flags)
>  #define local_dbg_enable()	asm("msr	daifclr, #8" : : : "memory")
>  #define local_dbg_disable()	asm("msr	daifset, #8" : : : "memory")
>  
> +enum daif_flag {

Is there any reason for this to be an enum rather than a set of
#defines?

It would be nice to be able to use these in asm.

> +	DAIF_FIQ   = (1UL << 6),
> +	DAIF_IRQ   = (1UL << 7),
> +	DAIF_ASYNC = (1UL << 8),
> +	DAIF_DBG   = (1UL << 9),
> +	DAIF_ALL   = (0xffUL << 6),

Not 0xf?

It would be nicer to OR the other flags.

> +};
> +
> +static inline void local_disable(unsigned long daif_flags)
> +{
> +	arch_local_irq_restore(daif_flags | arch_local_save_flags());

If we knew the value was a constant (which we could check with
__builting_constant_p) we could use daifset here, rather than having a
RMW sequence.

With that, the other local_*_{enable,disable} calls could be rewritten
in terms of this, without affecting the generated code. That would
require some shifting to account for the difference between pstate and
daif{clr,set} layout, but for constants that shouldn't be a problem.

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH 07/13] arm64: Add new routine local_disable
@ 2014-09-15 18:56     ` Mark Rutland
  0 siblings, 0 replies; 80+ messages in thread
From: Mark Rutland @ 2014-09-15 18:56 UTC (permalink / raw)
  To: Geoff Levand
  Cc: Marc Zyngier, Catalin Marinas, Will Deacon, linux-arm-kernel,
	kexec, christoffer.dall

Hi Geoff,

On Tue, Sep 09, 2014 at 11:49:04PM +0100, Geoff Levand wrote:
> Add the new arm64 routine local_disable() to allow the masking of several DAIF
> flags in one operation.  Currently, we only have routines to mask individual
> flags, and to mask several flags multiple calls to daifset are required.
>
> Signed-off-by: Geoff Levand <geoff@infradead.org>
> ---
>  arch/arm64/include/asm/irqflags.h | 13 +++++++++++++
>  1 file changed, 13 insertions(+)
> 
> diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
> index 11cc941..28521d4 100644
> --- a/arch/arm64/include/asm/irqflags.h
> +++ b/arch/arm64/include/asm/irqflags.h
> @@ -113,5 +113,18 @@ static inline int arch_irqs_disabled_flags(unsigned long flags)
>  #define local_dbg_enable()	asm("msr	daifclr, #8" : : : "memory")
>  #define local_dbg_disable()	asm("msr	daifset, #8" : : : "memory")
>  
> +enum daif_flag {

Is there any reason for this to be an enum rather than a set of
#defines?

It would be nice to be able to use these in asm.

> +	DAIF_FIQ   = (1UL << 6),
> +	DAIF_IRQ   = (1UL << 7),
> +	DAIF_ASYNC = (1UL << 8),
> +	DAIF_DBG   = (1UL << 9),
> +	DAIF_ALL   = (0xffUL << 6),

Not 0xf?

It would be nicer to OR the other flags.

> +};
> +
> +static inline void local_disable(unsigned long daif_flags)
> +{
> +	arch_local_irq_restore(daif_flags | arch_local_save_flags());

If we knew the value was a constant (which we could check with
__builting_constant_p) we could use daifset here, rather than having a
RMW sequence.

With that, the other local_*_{enable,disable} calls could be rewritten
in terms of this, without affecting the generated code. That would
require some shifting to account for the difference between pstate and
daif{clr,set} layout, but for constants that shouldn't be a problem.

Thanks,
Mark.

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 08/13] arm64: Use cpu_ops for smp_stop
  2014-09-09 22:49   ` Geoff Levand
@ 2014-09-15 19:06     ` Mark Rutland
  -1 siblings, 0 replies; 80+ messages in thread
From: Mark Rutland @ 2014-09-15 19:06 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Geoff,

On Tue, Sep 09, 2014 at 11:49:05PM +0100, Geoff Levand wrote:
> The current implementation of ipi_cpu_stop() is just a tight infinite loop
> around cpu_relax().  This infinite loop implementation is OK if the machine
> will soon do a poweroff, but it doesn't have any mechanism to allow a CPU
> to be brought back on-line, nor is it compatible with kexec re-boot.

I don't see why we should use this when we have disable_nonboot_cpus.

If the kernel is alive and well, disable_nonboot_cpus will correctly
shut down all but one CPU, returning an error if that fails, whereupon
we can respect the error code and halt the kexec.

If the kernel is not alive and well, we have no idea what CPUs are
executing anyway, so all we can expect to do is to boot a (UP) crash
kernel in some previously reserved memory. Trying to actually kill the
CPUs is nice, but possibly not necessary.

> Add a check for a valid cpu_die method of the appropriate cpu_ops structure,
> and if a valid method is found, transfer control to that method.  It is
> expected that the cpu_die method puts the CPU into a state such that they can
> be brought back on-line or progress through a kexec re-boot.
> 
> Signed-off-by: Geoff Levand <geoff@infradead.org>
> ---
>  arch/arm64/kernel/smp.c | 9 +++++++++
>  1 file changed, 9 insertions(+)
> 
> diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
> index 4743397..002aa8a 100644
> --- a/arch/arm64/kernel/smp.c
> +++ b/arch/arm64/kernel/smp.c
> @@ -555,6 +555,15 @@ static void ipi_cpu_stop(unsigned int cpu)
>  
>  	local_irq_disable();
>  
> +	/* If we have the cpu ops use them. */
> +
> +	if (cpu_ops[cpu]->cpu_disable &&
> +	    cpu_ops[cpu]->cpu_die &&
> +	    !cpu_ops[cpu]->cpu_disable(cpu))
> +		cpu_ops[cpu]->cpu_die(cpu);

I don't think kexec should handle this. The hotplug code already does
this, better (calling cpu_kill and returning an error code), and having
two callers of these functions is only going to lead to hard-to-debug
drift between the two.

>  	while (1)
>  		cpu_relax();

Any CPUs left here are a major problem.

We absolutely must fail kexec if a CPU is still in the kernel (in the
pen or in the kernel proper), or they can do arbitrarily bad things when
the kernel image gets clobbered. SO this is insufficient.

As I mention above, a crash kernel might be an exception to that rule,
but we shouldn't treat that as the usual case.

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH 08/13] arm64: Use cpu_ops for smp_stop
@ 2014-09-15 19:06     ` Mark Rutland
  0 siblings, 0 replies; 80+ messages in thread
From: Mark Rutland @ 2014-09-15 19:06 UTC (permalink / raw)
  To: Geoff Levand
  Cc: Marc Zyngier, Catalin Marinas, Will Deacon, linux-arm-kernel,
	kexec, christoffer.dall

Hi Geoff,

On Tue, Sep 09, 2014 at 11:49:05PM +0100, Geoff Levand wrote:
> The current implementation of ipi_cpu_stop() is just a tight infinite loop
> around cpu_relax().  This infinite loop implementation is OK if the machine
> will soon do a poweroff, but it doesn't have any mechanism to allow a CPU
> to be brought back on-line, nor is it compatible with kexec re-boot.

I don't see why we should use this when we have disable_nonboot_cpus.

If the kernel is alive and well, disable_nonboot_cpus will correctly
shut down all but one CPU, returning an error if that fails, whereupon
we can respect the error code and halt the kexec.

If the kernel is not alive and well, we have no idea what CPUs are
executing anyway, so all we can expect to do is to boot a (UP) crash
kernel in some previously reserved memory. Trying to actually kill the
CPUs is nice, but possibly not necessary.

> Add a check for a valid cpu_die method of the appropriate cpu_ops structure,
> and if a valid method is found, transfer control to that method.  It is
> expected that the cpu_die method puts the CPU into a state such that they can
> be brought back on-line or progress through a kexec re-boot.
> 
> Signed-off-by: Geoff Levand <geoff@infradead.org>
> ---
>  arch/arm64/kernel/smp.c | 9 +++++++++
>  1 file changed, 9 insertions(+)
> 
> diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
> index 4743397..002aa8a 100644
> --- a/arch/arm64/kernel/smp.c
> +++ b/arch/arm64/kernel/smp.c
> @@ -555,6 +555,15 @@ static void ipi_cpu_stop(unsigned int cpu)
>  
>  	local_irq_disable();
>  
> +	/* If we have the cpu ops use them. */
> +
> +	if (cpu_ops[cpu]->cpu_disable &&
> +	    cpu_ops[cpu]->cpu_die &&
> +	    !cpu_ops[cpu]->cpu_disable(cpu))
> +		cpu_ops[cpu]->cpu_die(cpu);

I don't think kexec should handle this. The hotplug code already does
this, better (calling cpu_kill and returning an error code), and having
two callers of these functions is only going to lead to hard-to-debug
drift between the two.

>  	while (1)
>  		cpu_relax();

Any CPUs left here are a major problem.

We absolutely must fail kexec if a CPU is still in the kernel (in the
pen or in the kernel proper), or they can do arbitrarily bad things when
the kernel image gets clobbered. SO this is insufficient.

As I mention above, a crash kernel might be an exception to that rule,
but we shouldn't treat that as the usual case.

Thanks,
Mark.

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 09/13] arm64/kexec: Kexec expects cpu_die
  2014-09-09 22:49   ` Geoff Levand
@ 2014-09-15 19:10     ` Mark Rutland
  -1 siblings, 0 replies; 80+ messages in thread
From: Mark Rutland @ 2014-09-15 19:10 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Sep 09, 2014 at 11:49:05PM +0100, Geoff Levand wrote:
> The arm64 implementation of kexec expects an operational cpu_die method of
> struct cpu_operations, so add defined(CONFIG_KEXEC) to the preprocessor
> conditional that enables cpu_die.

I very much do not like this, as I explained in previous postings and my
comments on other patches in this series.

Please use the hotplug infrastructure, and have kexec depend on
CPU_HOTPLUG || !SMP.

There is no reason for these files to have any knowledge of kexec
whatsoever.

Thanks,
Mark

> 
> Signed-off-by: Geoff Levand <geoff@infradead.org>
> ---
>  arch/arm64/include/asm/cpu_ops.h | 2 +-
>  arch/arm64/kernel/psci.c         | 4 ++--
>  2 files changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h
> index d7b4b38..9fd7281 100644
> --- a/arch/arm64/include/asm/cpu_ops.h
> +++ b/arch/arm64/include/asm/cpu_ops.h
> @@ -50,7 +50,7 @@ struct cpu_operations {
>  	int		(*cpu_prepare)(unsigned int);
>  	int		(*cpu_boot)(unsigned int);
>  	void		(*cpu_postboot)(void);
> -#ifdef CONFIG_HOTPLUG_CPU
> +#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_KEXEC)
>  	int		(*cpu_disable)(unsigned int cpu);
>  	void		(*cpu_die)(unsigned int cpu);
>  	int		(*cpu_kill)(unsigned int cpu);
> diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c
> index 5539547..81dbbc9 100644
> --- a/arch/arm64/kernel/psci.c
> +++ b/arch/arm64/kernel/psci.c
> @@ -380,7 +380,7 @@ static int cpu_psci_cpu_boot(unsigned int cpu)
>  	return err;
>  }
>  
> -#ifdef CONFIG_HOTPLUG_CPU
> +#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_KEXEC)
>  static int cpu_psci_cpu_disable(unsigned int cpu)
>  {
>  	/* Fail early if we don't have CPU_OFF support */
> @@ -442,7 +442,7 @@ const struct cpu_operations cpu_psci_ops = {
>  	.cpu_init	= cpu_psci_cpu_init,
>  	.cpu_prepare	= cpu_psci_cpu_prepare,
>  	.cpu_boot	= cpu_psci_cpu_boot,
> -#ifdef CONFIG_HOTPLUG_CPU
> +#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_KEXEC)
>  	.cpu_disable	= cpu_psci_cpu_disable,
>  	.cpu_die	= cpu_psci_cpu_die,
>  	.cpu_kill	= cpu_psci_cpu_kill,
> -- 
> 1.9.1
> 
> 
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
> 

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH 09/13] arm64/kexec: Kexec expects cpu_die
@ 2014-09-15 19:10     ` Mark Rutland
  0 siblings, 0 replies; 80+ messages in thread
From: Mark Rutland @ 2014-09-15 19:10 UTC (permalink / raw)
  To: Geoff Levand
  Cc: Marc Zyngier, Catalin Marinas, Will Deacon, linux-arm-kernel,
	kexec, christoffer.dall

On Tue, Sep 09, 2014 at 11:49:05PM +0100, Geoff Levand wrote:
> The arm64 implementation of kexec expects an operational cpu_die method of
> struct cpu_operations, so add defined(CONFIG_KEXEC) to the preprocessor
> conditional that enables cpu_die.

I very much do not like this, as I explained in previous postings and my
comments on other patches in this series.

Please use the hotplug infrastructure, and have kexec depend on
CPU_HOTPLUG || !SMP.

There is no reason for these files to have any knowledge of kexec
whatsoever.

Thanks,
Mark

> 
> Signed-off-by: Geoff Levand <geoff@infradead.org>
> ---
>  arch/arm64/include/asm/cpu_ops.h | 2 +-
>  arch/arm64/kernel/psci.c         | 4 ++--
>  2 files changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h
> index d7b4b38..9fd7281 100644
> --- a/arch/arm64/include/asm/cpu_ops.h
> +++ b/arch/arm64/include/asm/cpu_ops.h
> @@ -50,7 +50,7 @@ struct cpu_operations {
>  	int		(*cpu_prepare)(unsigned int);
>  	int		(*cpu_boot)(unsigned int);
>  	void		(*cpu_postboot)(void);
> -#ifdef CONFIG_HOTPLUG_CPU
> +#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_KEXEC)
>  	int		(*cpu_disable)(unsigned int cpu);
>  	void		(*cpu_die)(unsigned int cpu);
>  	int		(*cpu_kill)(unsigned int cpu);
> diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c
> index 5539547..81dbbc9 100644
> --- a/arch/arm64/kernel/psci.c
> +++ b/arch/arm64/kernel/psci.c
> @@ -380,7 +380,7 @@ static int cpu_psci_cpu_boot(unsigned int cpu)
>  	return err;
>  }
>  
> -#ifdef CONFIG_HOTPLUG_CPU
> +#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_KEXEC)
>  static int cpu_psci_cpu_disable(unsigned int cpu)
>  {
>  	/* Fail early if we don't have CPU_OFF support */
> @@ -442,7 +442,7 @@ const struct cpu_operations cpu_psci_ops = {
>  	.cpu_init	= cpu_psci_cpu_init,
>  	.cpu_prepare	= cpu_psci_cpu_prepare,
>  	.cpu_boot	= cpu_psci_cpu_boot,
> -#ifdef CONFIG_HOTPLUG_CPU
> +#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_KEXEC)
>  	.cpu_disable	= cpu_psci_cpu_disable,
>  	.cpu_die	= cpu_psci_cpu_die,
>  	.cpu_kill	= cpu_psci_cpu_kill,
> -- 
> 1.9.1
> 
> 
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
> 

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 10/13] arm64/kexec: Revert change to machine_shutdown()
  2014-09-09 22:49   ` Geoff Levand
@ 2014-09-15 19:20     ` Mark Rutland
  -1 siblings, 0 replies; 80+ messages in thread
From: Mark Rutland @ 2014-09-15 19:20 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Geoff,

On Tue, Sep 09, 2014 at 11:49:05PM +0100, Geoff Levand wrote:
> Commit 90f51a09ef83 ("arm64: Fix machine_shutdown() definition") changed the
> implementation of machine_shutdown() from calling smp_send_stop() to
> calling disable_nonboot_cpus().  Revert that part of the commit so that
> machine_shutdown() once again calls smp_send_stop().

I don't think this is the right way to go. I think we should be using
the hotplug infrastructure (i.e. disable_nonboot_cpus()) rather than
rolling our own, and we should be paying attention to the return value.

So as far as I can tell, disable_nonboot_cpus is the right thing to
call, but machine_shutdown is the wrong place to call it (due to not
returning an error code). We need to call it when we can still abort the
kexec.

Mark.
 
> With the application of another patch in this series, (arm64: Use cpu_ops for
> smp_stop), smp_send_stop() will do the correct thing for a kexec reboot. This
> change also corrects the source code comment for the machine_shutdown() routine.
> 
> Signed-off-by: Geoff Levand <geoff@infradead.org>
> ---
>  arch/arm64/kernel/process.c | 9 +++------
>  1 file changed, 3 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
> index 0a3414b..cd0ae9d 100644
> --- a/arch/arm64/kernel/process.c
> +++ b/arch/arm64/kernel/process.c
> @@ -101,15 +101,12 @@ void arch_cpu_idle_dead(void)
>  /*
>   * Called by kexec, immediately prior to machine_kexec().
>   *
> - * This must completely disable all secondary CPUs; simply causing those CPUs
> - * to execute e.g. a RAM-based pin loop is not sufficient. This allows the
> - * kexec'd kernel to use any and all RAM as it sees fit, without having to
> - * avoid any code or data used by any SW CPU pin loop. The CPU hotplug
> - * functionality embodied in disable_nonboot_cpus() to achieve this.
> + * This must shutdown all secondary CPUs.  The functionality
> + * embodied in smp_send_stop() will achieve this.
>   */
>  void machine_shutdown(void)
>  {
> -	disable_nonboot_cpus();
> +	smp_send_stop();
>  }
>  
>  /*
> -- 
> 1.9.1
> 
> 
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
> 

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH 10/13] arm64/kexec: Revert change to machine_shutdown()
@ 2014-09-15 19:20     ` Mark Rutland
  0 siblings, 0 replies; 80+ messages in thread
From: Mark Rutland @ 2014-09-15 19:20 UTC (permalink / raw)
  To: Geoff Levand
  Cc: Marc Zyngier, Catalin Marinas, Will Deacon, linux-arm-kernel,
	kexec, christoffer.dall

Hi Geoff,

On Tue, Sep 09, 2014 at 11:49:05PM +0100, Geoff Levand wrote:
> Commit 90f51a09ef83 ("arm64: Fix machine_shutdown() definition") changed the
> implementation of machine_shutdown() from calling smp_send_stop() to
> calling disable_nonboot_cpus().  Revert that part of the commit so that
> machine_shutdown() once again calls smp_send_stop().

I don't think this is the right way to go. I think we should be using
the hotplug infrastructure (i.e. disable_nonboot_cpus()) rather than
rolling our own, and we should be paying attention to the return value.

So as far as I can tell, disable_nonboot_cpus is the right thing to
call, but machine_shutdown is the wrong place to call it (due to not
returning an error code). We need to call it when we can still abort the
kexec.

Mark.
 
> With the application of another patch in this series, (arm64: Use cpu_ops for
> smp_stop), smp_send_stop() will do the correct thing for a kexec reboot. This
> change also corrects the source code comment for the machine_shutdown() routine.
> 
> Signed-off-by: Geoff Levand <geoff@infradead.org>
> ---
>  arch/arm64/kernel/process.c | 9 +++------
>  1 file changed, 3 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
> index 0a3414b..cd0ae9d 100644
> --- a/arch/arm64/kernel/process.c
> +++ b/arch/arm64/kernel/process.c
> @@ -101,15 +101,12 @@ void arch_cpu_idle_dead(void)
>  /*
>   * Called by kexec, immediately prior to machine_kexec().
>   *
> - * This must completely disable all secondary CPUs; simply causing those CPUs
> - * to execute e.g. a RAM-based pin loop is not sufficient. This allows the
> - * kexec'd kernel to use any and all RAM as it sees fit, without having to
> - * avoid any code or data used by any SW CPU pin loop. The CPU hotplug
> - * functionality embodied in disable_nonboot_cpus() to achieve this.
> + * This must shutdown all secondary CPUs.  The functionality
> + * embodied in smp_send_stop() will achieve this.
>   */
>  void machine_shutdown(void)
>  {
> -	disable_nonboot_cpus();
> +	smp_send_stop();
>  }
>  
>  /*
> -- 
> 1.9.1
> 
> 
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
> 

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 11/13] arm64/kexec: Add core kexec support
  2014-09-09 22:49   ` Geoff Levand
@ 2014-09-18  1:13     ` Mark Rutland
  -1 siblings, 0 replies; 80+ messages in thread
From: Mark Rutland @ 2014-09-18  1:13 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Geoff,

On Tue, Sep 09, 2014 at 11:49:05PM +0100, Geoff Levand wrote:
> Add three new files, kexec.h, machine_kexec.c and relocate_kernel.S to the
> arm64 architecture that add support for the kexec re-boot mechanism
> (CONFIG_KEXEC) on arm64 platforms.
>
> Signed-off-by: Geoff Levand <geoff@infradead.org>
> ---
>  arch/arm64/Kconfig                  |   8 +
>  arch/arm64/include/asm/kexec.h      |  52 +++
>  arch/arm64/kernel/Makefile          |   2 +
>  arch/arm64/kernel/machine_kexec.c   | 612 ++++++++++++++++++++++++++++++++++++
>  arch/arm64/kernel/relocate_kernel.S | 185 +++++++++++
>  include/uapi/linux/kexec.h          |   1 +
>  6 files changed, 860 insertions(+)
>  create mode 100644 arch/arm64/include/asm/kexec.h
>  create mode 100644 arch/arm64/kernel/machine_kexec.c
>  create mode 100644 arch/arm64/kernel/relocate_kernel.S
>
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index f0d3a2d..6f0e1f1 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -313,6 +313,14 @@ config ARCH_HAS_CACHE_LINE_SIZE
>
>  source "mm/Kconfig"
>
> +config KEXEC
> +       bool "kexec system call"
> +       ---help---
> +         kexec is a system call that implements the ability to shutdown your
> +         current kernel, and to start another kernel.  It is like a reboot
> +         but it is independent of the system firmware.   And like a reboot
> +         you can start any kernel with it, not just Linux.
> +
>  config XEN_DOM0
>         def_bool y
>         depends on XEN
> diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
> new file mode 100644
> index 0000000..9a3932c
> --- /dev/null
> +++ b/arch/arm64/include/asm/kexec.h
> @@ -0,0 +1,52 @@
> +/*
> + * kexec for arm64
> + *
> + * Copyright (C) Linaro.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#if !defined(_ARM64_KEXEC_H)
> +#define _ARM64_KEXEC_H
> +
> +/* Maximum physical address we can use pages from */
> +
> +#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
> +
> +/* Maximum address we can reach in physical address mode */
> +
> +#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
> +
> +/* Maximum address we can use for the control code buffer */
> +
> +#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL)
> +

What are these used for? I see that other architectures seem to do the
same thing, but they look odd.

> +#define KEXEC_CONTROL_PAGE_SIZE        4096

What's this used for?

Does this work with 64k pages, and is there any reason we can't figure
out the actual size of the code (so we don't get bitten if it grows)?

> +
> +#define KEXEC_ARCH KEXEC_ARCH_ARM64
> +
> +#define ARCH_HAS_KIMAGE_ARCH
> +
> +#if !defined(__ASSEMBLY__)
> +
> +struct kimage_arch {
> +       void *ctx;
> +};
> +
> +/**
> + * crash_setup_regs() - save registers for the panic kernel
> + *
> + * @newregs: registers are saved here
> + * @oldregs: registers to be saved (may be %NULL)
> + */
> +
> +static inline void crash_setup_regs(struct pt_regs *newregs,
> +                                   struct pt_regs *oldregs)
> +{
> +}

It would be nice to know what we're going to do for this.

Is this a required function, or can we get away without crash kernel
support for the moment?

> +
> +#endif /* !defined(__ASSEMBLY__) */
> +
> +#endif
> diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
> index df7ef87..8b7c029 100644
> --- a/arch/arm64/kernel/Makefile
> +++ b/arch/arm64/kernel/Makefile
> @@ -29,6 +29,8 @@ arm64-obj-$(CONFIG_ARM64_CPU_SUSPEND) += sleep.o suspend.o
>  arm64-obj-$(CONFIG_JUMP_LABEL)         += jump_label.o
>  arm64-obj-$(CONFIG_KGDB)               += kgdb.o
>  arm64-obj-$(CONFIG_EFI)                        += efi.o efi-stub.o efi-entry.o
> +arm64-obj-$(CONFIG_KEXEC)              += machine_kexec.o relocate_kernel.o    \
> +                                          cpu-properties.o
>
>  obj-y                                  += $(arm64-obj-y) vdso/
>  obj-m                                  += $(arm64-obj-m)
> diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
> new file mode 100644
> index 0000000..043a3bc
> --- /dev/null
> +++ b/arch/arm64/kernel/machine_kexec.c
> @@ -0,0 +1,612 @@
> +/*
> + * kexec for arm64
> + *
> + * Copyright (C) Linaro.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/kexec.h>
> +#include <linux/of_fdt.h>
> +#include <linux/slab.h>
> +#include <linux/uaccess.h>
> +
> +#include <asm/cacheflush.h>
> +#include <asm/cpu_ops.h>
> +#include <asm/system_misc.h>
> +
> +#include "cpu-properties.h"
> +
> +#if defined(DEBUG)
> +static const int debug = 1;
> +#else
> +static const int debug;
> +#endif

I don't think we need this.

> +
> +typedef struct dtb_buffer {char b[0]; } dtb_t;

It would be nice for this to be consistent with other dtb uses; if we
need a dtb type then it shouldn't be specific to kexec.

[...]

> +static struct kexec_ctx *current_ctx;
> +
> +static int kexec_ctx_alloc(struct kimage *image)
> +{
> +       BUG_ON(image->arch.ctx);
> +
> +       image->arch.ctx = kmalloc(sizeof(struct kexec_ctx), GFP_KERNEL);
> +
> +       if (!image->arch.ctx)
> +               return -ENOMEM;
> +
> +       current_ctx = (struct kexec_ctx *)image->arch.ctx;

This seems to be the only use of current_ctx. I take it this is a
leftover from debugging?

[...]

> +/**
> + * kexec_list_walk - Helper to walk the kimage page list.
> + */

Please keep this associated with the function it refers to (nothing
should be between this comment and the function prototype).

> +
> +#define IND_FLAGS (IND_DESTINATION | IND_INDIRECTION | IND_DONE | IND_SOURCE)

Can't this live in include/linux/kexec.h, where these flags are defined.

The meaning of these doesn't seem to be documented anywhere. Would you
be able to explain what each of these means?

> +static void kexec_list_walk(void *ctx, unsigned long kimage_head,
> +       void (*cb)(void *ctx, unsigned int flag, void *addr, void *dest))
> +{
> +       void *dest;
> +       unsigned long *entry;
> +
> +       for (entry = &kimage_head, dest = NULL; ; entry++) {
> +               unsigned int flag = *entry & IND_FLAGS;
> +               void *addr = phys_to_virt(*entry & PAGE_MASK);
> +
> +               switch (flag) {
> +               case IND_INDIRECTION:
> +                       entry = (unsigned long *)addr - 1;
> +                       cb(ctx, flag, addr, NULL);
> +                       break;
> +               case IND_DESTINATION:
> +                       dest = addr;
> +                       cb(ctx, flag, addr, NULL);
> +                       break;
> +               case IND_SOURCE:
> +                       cb(ctx, flag, addr, dest);
> +                       dest += PAGE_SIZE;

I really don't understand what's going on with dest here, but that's
probably because I don't understand the meaning of the flags.

> +                       break;
> +               case IND_DONE:
> +                       cb(ctx, flag , NULL, NULL);
> +                       return;
> +               default:
> +                       pr_devel("%s:%d unknown flag %xh\n", __func__, __LINE__,
> +                               flag);

Wouldn't pr_warn would be more appropriate here?

> +                       cb(ctx, flag, addr, NULL);
> +                       break;
> +               }
> +       }
> +}
> +
> +/**
> + * kexec_image_info - For debugging output.
> + */
> +
> +#define kexec_image_info(_i) _kexec_image_info(__func__, __LINE__, _i)
> +static void _kexec_image_info(const char *func, int line,
> +       const struct kimage *image)
> +{
> +       if (debug) {
> +               unsigned long i;
> +
> +               pr_devel("%s:%d:\n", func, line);
> +               pr_devel("  kexec image info:\n");
> +               pr_devel("    type:        %d\n", image->type);
> +               pr_devel("    start:       %lx\n", image->start);
> +               pr_devel("    head:        %lx\n", image->head);
> +               pr_devel("    nr_segments: %lu\n", image->nr_segments);
> +
> +               for (i = 0; i < image->nr_segments; i++) {
> +                       pr_devel("      segment[%lu]: %016lx - %016lx, "
> +                               "%lxh bytes, %lu pages\n",
> +                               i,
> +                               image->segment[i].mem,
> +                               image->segment[i].mem + image->segment[i].memsz,
> +                               image->segment[i].memsz,
> +                               image->segment[i].memsz /  PAGE_SIZE);
> +
> +                       if (kexec_is_dtb_user(image->segment[i].buf))
> +                               pr_devel("        dtb segment\n");
> +               }
> +       }
> +}

pr_devel is already dependent on DEBUG, so surely we don't need to check
the debug variable?

> +
> +/**
> + * kexec_find_dtb_seg - Helper routine to find the dtb segment.
> + */
> +
> +static const struct kexec_segment *kexec_find_dtb_seg(
> +       const struct kimage *image)
> +{
> +       int i;
> +
> +       for (i = 0; i < image->nr_segments; i++) {
> +               if (kexec_is_dtb_user(image->segment[i].buf))
> +                       return &image->segment[i];
> +       }
> +
> +       return NULL;
> +}

I'm really not keen on having the kernel guess which blobs need special
treatment, though we seem to do that for arm.

It would be far nicer if we could pass flags for each segment to
describe what it is (e.g. kernel image, dtb, other binary blob), so we
can do things like pass multiple DTBs (so we load two kernels at once
and pass each a unique DTB if we want to boot a new kernel + crashkernel
pair). Unfortunately that would require some fairly invasive rework of
the kexec core.

For secureboot we can't trust a dtb from userspace, and will have to use
kexec_file_load. To work with that we can either:

* Reuse the original DTB, patched with the new command line. This may
  have statefulness issues (for things like simplefb).

* Build a new DTB by flattening the current live tree. This would rely
  on drivers that modify state to patch the tree appropriately.

Both of those are somewhat ugly. :(

[...]

> +/**
> + * kexec_cpu_info_init - Initialize an array of kexec_cpu_info structures.
> + *
> + * Allocates a cpu info array and fills it with info for all cpus found in
> + * the device tree passed.
> + */
> +
> +static int kexec_cpu_info_init(const struct device_node *dn,
> +       struct kexec_boot_info *info)
> +{
> +       int result;
> +       unsigned int cpu;
> +
> +       info->cp = kmalloc(
> +               info->cpu_count * sizeof(*info->cp), GFP_KERNEL);
> +
> +       if (!info->cp) {
> +               pr_err("%s: Error: Out of memory.", __func__);
> +               return -ENOMEM;
> +       }
> +
> +       for (cpu = 0; cpu < info->cpu_count; cpu++) {
> +               struct cpu_properties *cp = &info->cp[cpu];
> +
> +               dn = of_find_node_by_type((struct device_node *)dn, "cpu");
> +
> +               if (!dn) {
> +                       pr_devel("%s:%d: bad node\n", __func__, __LINE__);
> +                       goto on_error;
> +               }
> +
> +               result = read_cpu_properties(cp, dn);
> +
> +               if (result) {
> +                       pr_devel("%s:%d: bad node\n", __func__, __LINE__);
> +                       goto on_error;
> +               }
> +
> +               if (cp->type == cpu_enable_method_psci)
> +                       pr_devel("%s:%d: cpu-%u: hwid-%llx, '%s'\n",
> +                               __func__, __LINE__, cpu, cp->hwid,
> +                               cp->enable_method);
> +               else
> +                       pr_devel("%s:%d: cpu-%u: hwid-%llx, '%s', "
> +                               "cpu-release-addr %llx\n",
> +                               __func__, __LINE__, cpu, cp->hwid,
> +                               cp->enable_method,
> +                               cp->cpu_release_addr);
> +       }
> +
> +       return 0;
> +
> +on_error:
> +       kfree(info->cp);
> +       info->cp = NULL;
> +       return -EINVAL;
> +}

I don't see why we should need this at all. If we use the hotplug
infrastructure, we don't need access to the enable-method and related
properties, and the kexec code need only deal with a single CPU.

The only case where kexec needs to deal with other CPUs is when some are
sat in the holding pen, but this code doesn't seem to handle that.

as I believe I mentioned before, we should be able to extend the holding
pen code to get those CPUs to increment a sat-in-pen counter and if
that's non-zero after SMP bringup we print a warning (and disallow
kexec).

[...]

> +/**
> +* kexec_compat_check - Iterator for kexec_cpu_check.
> +*/
> +
> +static int kexec_compat_check(const struct kexec_ctx *ctx)
> +{
> +       unsigned int cpu_1;
> +       unsigned int to_process;
> +
> +       to_process = min(ctx->first.cpu_count, ctx->second.cpu_count);
> +
> +       if (ctx->first.cpu_count != ctx->second.cpu_count)
> +               pr_warn("%s: Warning: CPU count mismatch %u != %u.\n",
> +                       __func__, ctx->first.cpu_count, ctx->second.cpu_count);
> +
> +       for (cpu_1 = 0; cpu_1 < ctx->first.cpu_count; cpu_1++) {
> +               unsigned int cpu_2;
> +               struct cpu_properties *cp_1 = &ctx->first.cp[cpu_1];
> +
> +               for (cpu_2 = 0; cpu_2 < ctx->second.cpu_count; cpu_2++) {
> +                       struct cpu_properties *cp_2 = &ctx->second.cp[cpu_2];
> +
> +                       if (cp_1->hwid != cp_2->hwid)
> +                               continue;
> +
> +                       if (!kexec_cpu_check(cp_1, cp_2))
> +                               return -EINVAL;
> +
> +                       to_process--;
> +               }
> +       }
> +
> +       if (to_process) {
> +               pr_warn("%s: Warning: Failed to process %u CPUs.\n", __func__,
> +                       to_process);
> +               return -EINVAL;
> +       }
> +
> +       return 0;
> +}

I don't see the point in checking this in the kernel. If I pass the
second stage kernel a new dtb where my enable methods are different,
that was my choice as a user. If that doesn't work, that's my fault.

There are plenty of other things that might be completely different that
we don't sanity check, so I don't see why enable methods should be any
different.

[...]

> +/**
> + * kexec_check_cpu_die -  Check if cpu_die() will work on secondary processors.
> + */
> +
> +static int kexec_check_cpu_die(void)
> +{
> +       unsigned int cpu;
> +       unsigned int sum = 0;
> +
> +       /* For simplicity this also checks the primary CPU. */
> +
> +       for_each_cpu(cpu, cpu_all_mask) {
> +               if (cpu && (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_disable ||
> +                       cpu_ops[cpu]->cpu_disable(cpu))) {
> +                       sum++;
> +                       pr_err("%s: Error: "
> +                               "CPU %u does not support hot un-plug.\n",
> +                               __func__, cpu);
> +               }
> +       }
> +
> +       return sum ? -EOPNOTSUPP : 0;
> +}

We should really use disable_nonboot_cpus() for this. That way we don't
end up with a slightly different hotplug implementation for kexec. The
above is missing cpu_kill calls, for example, and I'm worried by the
possibility of further drift over time.

I understand from our face-to-face discussion that you didn't want to
require the PM infrastructure that disable_nonboot_cpus currently pulls
in due to the being dependent on CONFIG_PM_SLEEP_SMP which selects
CONFIG_PM_SLEEP and so on. The solution to that is to refactor the
Kconfig so we can have disable_nonboot_cpus without all the other PM
infrastructure.

> +
> +/**
> + * machine_kexec_prepare - Prepare for a kexec reboot.
> + *
> + * Called from the core kexec code when a kernel image is loaded.
> + */
> +
> +int machine_kexec_prepare(struct kimage *image)
> +{
> +       int result;

This seems to always be an error code. Call it 'err'.

> +       dtb_t *dtb = NULL;
> +       struct kexec_ctx *ctx;
> +       const struct kexec_segment *dtb_seg;
> +
> +       kexec_image_info(image);
> +
> +       result = kexec_check_cpu_die();
> +
> +       if (result)
> +               goto on_error;
> +
> +       result = kexec_ctx_alloc(image);
> +
> +       if (result)
> +               goto on_error;
> +
> +       ctx = kexec_image_to_ctx(image);
> +
> +       result = kexec_boot_info_init(&ctx->first, NULL);
> +
> +       if (result)
> +               goto on_error;
> +
> +       dtb_seg = kexec_find_dtb_seg(image);
> +
> +       if (!dtb_seg) {
> +               result = -EINVAL;
> +               goto on_error;
> +       }
> +
> +       result = kexec_copy_dtb(dtb_seg, &dtb);
> +
> +       if (result)
> +               goto on_error;
> +
> +       result = kexec_boot_info_init(&ctx->second, dtb);
> +
> +       if (result)
> +               goto on_error;
> +
> +       result = kexec_compat_check(ctx);
> +
> +       if (result)
> +               goto on_error;
> +
> +       kexec_dtb_addr = dtb_seg->mem;
> +       kexec_kimage_start = image->start;
> +
> +       goto on_exit;
> +
> +on_error:
> +       kexec_ctx_clean(image);
> +on_exit:

on_* looks weird, and doesn't match the style of other labels in
arch/arm64. Could we call these 'out_clean' and 'out' instead?

> +       kfree(dtb);
> +       return result;
> +}
> +
> +/**
> + * kexec_list_flush_cb - Callback to flush the kimage list to PoC.
> + */
> +
> +static void kexec_list_flush_cb(void *ctx , unsigned int flag,
> +       void *addr, void *dest)
> +{
> +       switch (flag) {
> +       case IND_INDIRECTION:
> +       case IND_SOURCE:
> +               __flush_dcache_area(addr, PAGE_SIZE);

Is PAGE_SIZE always big enough? Do we not have a more accurate size?

Perhaps I've misunderstood what's going on here.

> +               break;
> +       default:
> +               break;
> +       }
> +}
> +
> +/**
> + * machine_kexec - Do the kexec reboot.
> + *
> + * Called from the core kexec code for a sys_reboot with LINUX_REBOOT_CMD_KEXEC.
> + */
> +
> +void machine_kexec(struct kimage *image)
> +{
> +       phys_addr_t reboot_code_buffer_phys;
> +       void *reboot_code_buffer;
> +       struct kexec_ctx *ctx = kexec_image_to_ctx(image);
> +
> +       BUG_ON(relocate_new_kernel_size > KEXEC_CONTROL_PAGE_SIZE);

It looks like relocate_new_kernel_size is a build-time constant. If we
need that to be less than KEXEC_CONTROL_PAGE_SIZE, then we should make
that a build-time check.

> +       BUG_ON(num_online_cpus() > 1);
> +       BUG_ON(!ctx);
> +
> +       kexec_image_info(image);
> +
> +       kexec_kimage_head = image->head;
> +
> +       reboot_code_buffer_phys = page_to_phys(image->control_code_page);
> +       reboot_code_buffer = phys_to_virt(reboot_code_buffer_phys);
> +
> +       pr_devel("%s:%d: control_code_page:        %p\n", __func__, __LINE__,
> +               (void *)image->control_code_page);

This is already a pointer. Is the cast to void necessary?

> +       pr_devel("%s:%d: reboot_code_buffer_phys:  %p\n", __func__, __LINE__,
> +               (void *)reboot_code_buffer_phys);

Use %pa and pass &reboot_code_buffer_phys, no cast necessary.

> +       pr_devel("%s:%d: reboot_code_buffer:       %p\n", __func__, __LINE__,
> +               reboot_code_buffer);
> +       pr_devel("%s:%d: relocate_new_kernel:      %p\n", __func__, __LINE__,
> +               relocate_new_kernel);
> +       pr_devel("%s:%d: relocate_new_kernel_size: %lxh(%lu) bytes\n", __func__,
> +               __LINE__, relocate_new_kernel_size, relocate_new_kernel_size);

Please use an '0x' prefix rather than a 'h' suffix. Do we need in print
in both hex and decimal?

> +
> +       pr_devel("%s:%d: kexec_dtb_addr:           %p\n", __func__, __LINE__,
> +               (void *)kexec_dtb_addr);
> +       pr_devel("%s:%d: kexec_kimage_head:        %p\n", __func__, __LINE__,
> +               (void *)kexec_kimage_head);
> +       pr_devel("%s:%d: kexec_kimage_start:       %p\n", __func__, __LINE__,
> +               (void *)kexec_kimage_start);

These are all unsigned long, so why not use the existing mechanism for
printing unsigned long?

> +
> +       /*
> +        * Copy relocate_new_kernel to the reboot_code_buffer for use
> +        * after the kernel is shut down.
> +        */
> +
> +       memcpy(reboot_code_buffer, relocate_new_kernel,
> +               relocate_new_kernel_size);
> +
> +       /* Assure reboot_code_buffer is copied. */
> +
> +       mb();

I don't think we need the mb if this is only to guarantee completion
before the cache flush -- cacheable memory accesses should hazard
against cache flushes by VA.

> +
> +       pr_info("Bye!\n");
> +
> +       local_disable(DAIF_ALL);

We can move these two right before the soft_restart, after the cache
maintenance. That way the print is closer to the exit of the current
kernel.

> +
> +       /* Flush the reboot_code_buffer in preparation for its execution. */
> +
> +       __flush_dcache_area(reboot_code_buffer, relocate_new_kernel_size);
> +
> +       /* Flush the kimage list. */
> +
> +       kexec_list_walk(NULL, image->head, kexec_list_flush_cb);
> +
> +       soft_restart(reboot_code_buffer_phys);
> +}
> +
> +void machine_crash_shutdown(struct pt_regs *regs)
> +{
> +       /* Empty routine needed to avoid build errors. */
> +}
> diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S
> new file mode 100644
> index 0000000..92aba9d
> --- /dev/null
> +++ b/arch/arm64/kernel/relocate_kernel.S
> @@ -0,0 +1,185 @@
> +/*
> + * kexec for arm64
> + *
> + * Copyright (C) Linaro.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include <asm/assembler.h>
> +#include <asm/memory.h>
> +#include <asm/page.h>
> +
> +/* The list entry flags. */
> +
> +#define IND_DESTINATION_BIT 0
> +#define IND_INDIRECTION_BIT 1
> +#define IND_DONE_BIT        2
> +#define IND_SOURCE_BIT      3

Given these ned to match the existing IND_* flags in
include/linux/kexec.h, and they aren't in any way specific to arm64,
please put these ina an asm-generic header and redefine the existing
IND_* flags in terms of them.

> +
> +/*
> + * relocate_new_kernel - Put the 2nd stage kernel image in place and boot it.
> + *
> + * The memory that the old kernel occupies may be overwritten when coping the
> + * new kernel to its final location.  To assure that the relocate_new_kernel
> + * routine which does that copy is not overwritten all code and data needed
> + * by relocate_new_kernel must be between the symbols relocate_new_kernel and
> + * relocate_new_kernel_end.  The machine_kexec() routine will copy
> + * relocate_new_kernel to the kexec control_code_page, a special page which
> + * has been set up to be preserved during the kernel copy operation.
> + */
> +
> +.align 3

Surely this isn't necessary?

> +
> +.globl relocate_new_kernel
> +relocate_new_kernel:
> +
> +       /* Setup the list loop variables. */
> +
> +       ldr     x10, kexec_kimage_head          /* x10 = list entry */

Any reason for using x10 rather than starting with x0? Or x18, if you
need to preserve the low registers?

> +
> +       mrs     x0, ctr_el0
> +       ubfm    x0, x0, #16, #19
> +       mov     x11, #4
> +       lsl     x11, x11, x0                    /* x11 = dcache line size */

Any reason we can't use dcache_line_size, given it's a macro?

> +
> +       mov     x12, xzr                        /* x12 = segment start */
> +       mov     x13, xzr                        /* x13 = entry ptr */
> +       mov     x14, xzr                        /* x14 = copy dest */
> +
> +       /* Check if the new kernel needs relocation. */
> +
> +       cbz     x10, .Ldone
> +       tbnz    x10, IND_DONE_BIT, .Ldone
> +
> +.Lloop:

Is there any reason for the '.L' on all of these? We only seem to do
that in the lib code that was imported from elsewhere, and it doesn't
match the rest of the arm64 asm.

> +       and     x15, x10, PAGE_MASK             /* x15 = addr */
> +
> +       /* Test the entry flags. */
> +
> +.Ltest_source:
> +       tbz     x10, IND_SOURCE_BIT, .Ltest_indirection
> +
> +       /* copy_page(x20 = dest, x21 = src) */
> +
> +       mov x20, x14
> +       mov x21, x15
> +
> +1:     ldp     x22, x23, [x21]
> +       ldp     x24, x25, [x21, #16]
> +       ldp     x26, x27, [x21, #32]
> +       ldp     x28, x29, [x21, #48]
> +       add     x21, x21, #64
> +       stnp    x22, x23, [x20]
> +       stnp    x24, x25, [x20, #16]
> +       stnp    x26, x27, [x20, #32]
> +       stnp    x28, x29, [x20, #48]
> +       add     x20, x20, #64
> +       tst     x21, #(PAGE_SIZE - 1)
> +       b.ne    1b

It's a shame we can't reuse copy_page directly. Could we not move the
body to a macro we can reuse here?

> +
> +       /* dest += PAGE_SIZE */
> +
> +       add     x14, x14, PAGE_SIZE
> +       b       .Lnext
> +
> +.Ltest_indirection:
> +       tbz     x10, IND_INDIRECTION_BIT, .Ltest_destination
> +
> +       /* ptr = addr */
> +
> +       mov     x13, x15
> +       b       .Lnext
> +
> +.Ltest_destination:
> +       tbz     x10, IND_DESTINATION_BIT, .Lnext
> +
> +       /* flush segment */
> +
> +       bl      .Lflush
> +       mov     x12, x15
> +
> +       /* dest = addr */
> +
> +       mov     x14, x15
> +
> +.Lnext:
> +       /* entry = *ptr++ */
> +
> +       ldr     x10, [x13]
> +       add     x13, x13, 8

This can be:

	ldr	x10, [x13], #8

> +
> +       /* while (!(entry & DONE)) */
> +
> +       tbz     x10, IND_DONE_BIT, .Lloop
> +
> +.Ldone:
> +       /* flush last segment */
> +
> +       bl      .Lflush
> +
> +       dsb     sy
> +       isb
> +       ic      ialluis

This doesn't look right; we need a dsb and an isb after the instruction
cache maintenance (or the icache could still be flushing when we branch
to the new kernel).

> +
> +       /* start_new_kernel */
> +
> +       ldr     x4, kexec_kimage_start
> +       ldr     x0, kexec_dtb_addr
> +       mov     x1, xzr
> +       mov     x2, xzr
> +       mov     x3, xzr
> +       br      x4
> +
> +/* flush - x11 = line size, x12 = start addr, x14 = end addr. */
> +
> +.Lflush:
> +       cbz     x12, 2f
> +       mov     x0, x12
> +       sub     x1, x11, #1
> +       bic     x0, x0, x1
> +1:     dc      civac, x0
> +       add     x0, x0, x11
> +       cmp     x0, x14
> +       b.lo    1b
> +2:     ret

It would be nice if this were earlier in the file, before its callers.

> +
> +.align 3

We should have a comment as to why this is needed (to keep the 64-bit
values below naturally aligned).

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH 11/13] arm64/kexec: Add core kexec support
@ 2014-09-18  1:13     ` Mark Rutland
  0 siblings, 0 replies; 80+ messages in thread
From: Mark Rutland @ 2014-09-18  1:13 UTC (permalink / raw)
  To: Geoff Levand, Catalin Marinas
  Cc: Marc Zyngier, Catalin Marinas, Will Deacon, linux-arm-kernel,
	kexec, christoffer.dall

Hi Geoff,

On Tue, Sep 09, 2014 at 11:49:05PM +0100, Geoff Levand wrote:
> Add three new files, kexec.h, machine_kexec.c and relocate_kernel.S to the
> arm64 architecture that add support for the kexec re-boot mechanism
> (CONFIG_KEXEC) on arm64 platforms.
>
> Signed-off-by: Geoff Levand <geoff@infradead.org>
> ---
>  arch/arm64/Kconfig                  |   8 +
>  arch/arm64/include/asm/kexec.h      |  52 +++
>  arch/arm64/kernel/Makefile          |   2 +
>  arch/arm64/kernel/machine_kexec.c   | 612 ++++++++++++++++++++++++++++++++++++
>  arch/arm64/kernel/relocate_kernel.S | 185 +++++++++++
>  include/uapi/linux/kexec.h          |   1 +
>  6 files changed, 860 insertions(+)
>  create mode 100644 arch/arm64/include/asm/kexec.h
>  create mode 100644 arch/arm64/kernel/machine_kexec.c
>  create mode 100644 arch/arm64/kernel/relocate_kernel.S
>
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index f0d3a2d..6f0e1f1 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -313,6 +313,14 @@ config ARCH_HAS_CACHE_LINE_SIZE
>
>  source "mm/Kconfig"
>
> +config KEXEC
> +       bool "kexec system call"
> +       ---help---
> +         kexec is a system call that implements the ability to shutdown your
> +         current kernel, and to start another kernel.  It is like a reboot
> +         but it is independent of the system firmware.   And like a reboot
> +         you can start any kernel with it, not just Linux.
> +
>  config XEN_DOM0
>         def_bool y
>         depends on XEN
> diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
> new file mode 100644
> index 0000000..9a3932c
> --- /dev/null
> +++ b/arch/arm64/include/asm/kexec.h
> @@ -0,0 +1,52 @@
> +/*
> + * kexec for arm64
> + *
> + * Copyright (C) Linaro.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#if !defined(_ARM64_KEXEC_H)
> +#define _ARM64_KEXEC_H
> +
> +/* Maximum physical address we can use pages from */
> +
> +#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
> +
> +/* Maximum address we can reach in physical address mode */
> +
> +#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
> +
> +/* Maximum address we can use for the control code buffer */
> +
> +#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL)
> +

What are these used for? I see that other architectures seem to do the
same thing, but they look odd.

> +#define KEXEC_CONTROL_PAGE_SIZE        4096

What's this used for?

Does this work with 64k pages, and is there any reason we can't figure
out the actual size of the code (so we don't get bitten if it grows)?

> +
> +#define KEXEC_ARCH KEXEC_ARCH_ARM64
> +
> +#define ARCH_HAS_KIMAGE_ARCH
> +
> +#if !defined(__ASSEMBLY__)
> +
> +struct kimage_arch {
> +       void *ctx;
> +};
> +
> +/**
> + * crash_setup_regs() - save registers for the panic kernel
> + *
> + * @newregs: registers are saved here
> + * @oldregs: registers to be saved (may be %NULL)
> + */
> +
> +static inline void crash_setup_regs(struct pt_regs *newregs,
> +                                   struct pt_regs *oldregs)
> +{
> +}

It would be nice to know what we're going to do for this.

Is this a required function, or can we get away without crash kernel
support for the moment?

> +
> +#endif /* !defined(__ASSEMBLY__) */
> +
> +#endif
> diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
> index df7ef87..8b7c029 100644
> --- a/arch/arm64/kernel/Makefile
> +++ b/arch/arm64/kernel/Makefile
> @@ -29,6 +29,8 @@ arm64-obj-$(CONFIG_ARM64_CPU_SUSPEND) += sleep.o suspend.o
>  arm64-obj-$(CONFIG_JUMP_LABEL)         += jump_label.o
>  arm64-obj-$(CONFIG_KGDB)               += kgdb.o
>  arm64-obj-$(CONFIG_EFI)                        += efi.o efi-stub.o efi-entry.o
> +arm64-obj-$(CONFIG_KEXEC)              += machine_kexec.o relocate_kernel.o    \
> +                                          cpu-properties.o
>
>  obj-y                                  += $(arm64-obj-y) vdso/
>  obj-m                                  += $(arm64-obj-m)
> diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
> new file mode 100644
> index 0000000..043a3bc
> --- /dev/null
> +++ b/arch/arm64/kernel/machine_kexec.c
> @@ -0,0 +1,612 @@
> +/*
> + * kexec for arm64
> + *
> + * Copyright (C) Linaro.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/kexec.h>
> +#include <linux/of_fdt.h>
> +#include <linux/slab.h>
> +#include <linux/uaccess.h>
> +
> +#include <asm/cacheflush.h>
> +#include <asm/cpu_ops.h>
> +#include <asm/system_misc.h>
> +
> +#include "cpu-properties.h"
> +
> +#if defined(DEBUG)
> +static const int debug = 1;
> +#else
> +static const int debug;
> +#endif

I don't think we need this.

> +
> +typedef struct dtb_buffer {char b[0]; } dtb_t;

It would be nice for this to be consistent with other dtb uses; if we
need a dtb type then it shouldn't be specific to kexec.

[...]

> +static struct kexec_ctx *current_ctx;
> +
> +static int kexec_ctx_alloc(struct kimage *image)
> +{
> +       BUG_ON(image->arch.ctx);
> +
> +       image->arch.ctx = kmalloc(sizeof(struct kexec_ctx), GFP_KERNEL);
> +
> +       if (!image->arch.ctx)
> +               return -ENOMEM;
> +
> +       current_ctx = (struct kexec_ctx *)image->arch.ctx;

This seems to be the only use of current_ctx. I take it this is a
leftover from debugging?

[...]

> +/**
> + * kexec_list_walk - Helper to walk the kimage page list.
> + */

Please keep this associated with the function it refers to (nothing
should be between this comment and the function prototype).

> +
> +#define IND_FLAGS (IND_DESTINATION | IND_INDIRECTION | IND_DONE | IND_SOURCE)

Can't this live in include/linux/kexec.h, where these flags are defined.

The meaning of these doesn't seem to be documented anywhere. Would you
be able to explain what each of these means?

> +static void kexec_list_walk(void *ctx, unsigned long kimage_head,
> +       void (*cb)(void *ctx, unsigned int flag, void *addr, void *dest))
> +{
> +       void *dest;
> +       unsigned long *entry;
> +
> +       for (entry = &kimage_head, dest = NULL; ; entry++) {
> +               unsigned int flag = *entry & IND_FLAGS;
> +               void *addr = phys_to_virt(*entry & PAGE_MASK);
> +
> +               switch (flag) {
> +               case IND_INDIRECTION:
> +                       entry = (unsigned long *)addr - 1;
> +                       cb(ctx, flag, addr, NULL);
> +                       break;
> +               case IND_DESTINATION:
> +                       dest = addr;
> +                       cb(ctx, flag, addr, NULL);
> +                       break;
> +               case IND_SOURCE:
> +                       cb(ctx, flag, addr, dest);
> +                       dest += PAGE_SIZE;

I really don't understand what's going on with dest here, but that's
probably because I don't understand the meaning of the flags.

> +                       break;
> +               case IND_DONE:
> +                       cb(ctx, flag , NULL, NULL);
> +                       return;
> +               default:
> +                       pr_devel("%s:%d unknown flag %xh\n", __func__, __LINE__,
> +                               flag);

Wouldn't pr_warn would be more appropriate here?

> +                       cb(ctx, flag, addr, NULL);
> +                       break;
> +               }
> +       }
> +}
> +
> +/**
> + * kexec_image_info - For debugging output.
> + */
> +
> +#define kexec_image_info(_i) _kexec_image_info(__func__, __LINE__, _i)
> +static void _kexec_image_info(const char *func, int line,
> +       const struct kimage *image)
> +{
> +       if (debug) {
> +               unsigned long i;
> +
> +               pr_devel("%s:%d:\n", func, line);
> +               pr_devel("  kexec image info:\n");
> +               pr_devel("    type:        %d\n", image->type);
> +               pr_devel("    start:       %lx\n", image->start);
> +               pr_devel("    head:        %lx\n", image->head);
> +               pr_devel("    nr_segments: %lu\n", image->nr_segments);
> +
> +               for (i = 0; i < image->nr_segments; i++) {
> +                       pr_devel("      segment[%lu]: %016lx - %016lx, "
> +                               "%lxh bytes, %lu pages\n",
> +                               i,
> +                               image->segment[i].mem,
> +                               image->segment[i].mem + image->segment[i].memsz,
> +                               image->segment[i].memsz,
> +                               image->segment[i].memsz /  PAGE_SIZE);
> +
> +                       if (kexec_is_dtb_user(image->segment[i].buf))
> +                               pr_devel("        dtb segment\n");
> +               }
> +       }
> +}

pr_devel is already dependent on DEBUG, so surely we don't need to check
the debug variable?

> +
> +/**
> + * kexec_find_dtb_seg - Helper routine to find the dtb segment.
> + */
> +
> +static const struct kexec_segment *kexec_find_dtb_seg(
> +       const struct kimage *image)
> +{
> +       int i;
> +
> +       for (i = 0; i < image->nr_segments; i++) {
> +               if (kexec_is_dtb_user(image->segment[i].buf))
> +                       return &image->segment[i];
> +       }
> +
> +       return NULL;
> +}

I'm really not keen on having the kernel guess which blobs need special
treatment, though we seem to do that for arm.

It would be far nicer if we could pass flags for each segment to
describe what it is (e.g. kernel image, dtb, other binary blob), so we
can do things like pass multiple DTBs (so we load two kernels at once
and pass each a unique DTB if we want to boot a new kernel + crashkernel
pair). Unfortunately that would require some fairly invasive rework of
the kexec core.

For secureboot we can't trust a dtb from userspace, and will have to use
kexec_file_load. To work with that we can either:

* Reuse the original DTB, patched with the new command line. This may
  have statefulness issues (for things like simplefb).

* Build a new DTB by flattening the current live tree. This would rely
  on drivers that modify state to patch the tree appropriately.

Both of those are somewhat ugly. :(

[...]

> +/**
> + * kexec_cpu_info_init - Initialize an array of kexec_cpu_info structures.
> + *
> + * Allocates a cpu info array and fills it with info for all cpus found in
> + * the device tree passed.
> + */
> +
> +static int kexec_cpu_info_init(const struct device_node *dn,
> +       struct kexec_boot_info *info)
> +{
> +       int result;
> +       unsigned int cpu;
> +
> +       info->cp = kmalloc(
> +               info->cpu_count * sizeof(*info->cp), GFP_KERNEL);
> +
> +       if (!info->cp) {
> +               pr_err("%s: Error: Out of memory.", __func__);
> +               return -ENOMEM;
> +       }
> +
> +       for (cpu = 0; cpu < info->cpu_count; cpu++) {
> +               struct cpu_properties *cp = &info->cp[cpu];
> +
> +               dn = of_find_node_by_type((struct device_node *)dn, "cpu");
> +
> +               if (!dn) {
> +                       pr_devel("%s:%d: bad node\n", __func__, __LINE__);
> +                       goto on_error;
> +               }
> +
> +               result = read_cpu_properties(cp, dn);
> +
> +               if (result) {
> +                       pr_devel("%s:%d: bad node\n", __func__, __LINE__);
> +                       goto on_error;
> +               }
> +
> +               if (cp->type == cpu_enable_method_psci)
> +                       pr_devel("%s:%d: cpu-%u: hwid-%llx, '%s'\n",
> +                               __func__, __LINE__, cpu, cp->hwid,
> +                               cp->enable_method);
> +               else
> +                       pr_devel("%s:%d: cpu-%u: hwid-%llx, '%s', "
> +                               "cpu-release-addr %llx\n",
> +                               __func__, __LINE__, cpu, cp->hwid,
> +                               cp->enable_method,
> +                               cp->cpu_release_addr);
> +       }
> +
> +       return 0;
> +
> +on_error:
> +       kfree(info->cp);
> +       info->cp = NULL;
> +       return -EINVAL;
> +}

I don't see why we should need this at all. If we use the hotplug
infrastructure, we don't need access to the enable-method and related
properties, and the kexec code need only deal with a single CPU.

The only case where kexec needs to deal with other CPUs is when some are
sat in the holding pen, but this code doesn't seem to handle that.

as I believe I mentioned before, we should be able to extend the holding
pen code to get those CPUs to increment a sat-in-pen counter and if
that's non-zero after SMP bringup we print a warning (and disallow
kexec).

[...]

> +/**
> +* kexec_compat_check - Iterator for kexec_cpu_check.
> +*/
> +
> +static int kexec_compat_check(const struct kexec_ctx *ctx)
> +{
> +       unsigned int cpu_1;
> +       unsigned int to_process;
> +
> +       to_process = min(ctx->first.cpu_count, ctx->second.cpu_count);
> +
> +       if (ctx->first.cpu_count != ctx->second.cpu_count)
> +               pr_warn("%s: Warning: CPU count mismatch %u != %u.\n",
> +                       __func__, ctx->first.cpu_count, ctx->second.cpu_count);
> +
> +       for (cpu_1 = 0; cpu_1 < ctx->first.cpu_count; cpu_1++) {
> +               unsigned int cpu_2;
> +               struct cpu_properties *cp_1 = &ctx->first.cp[cpu_1];
> +
> +               for (cpu_2 = 0; cpu_2 < ctx->second.cpu_count; cpu_2++) {
> +                       struct cpu_properties *cp_2 = &ctx->second.cp[cpu_2];
> +
> +                       if (cp_1->hwid != cp_2->hwid)
> +                               continue;
> +
> +                       if (!kexec_cpu_check(cp_1, cp_2))
> +                               return -EINVAL;
> +
> +                       to_process--;
> +               }
> +       }
> +
> +       if (to_process) {
> +               pr_warn("%s: Warning: Failed to process %u CPUs.\n", __func__,
> +                       to_process);
> +               return -EINVAL;
> +       }
> +
> +       return 0;
> +}

I don't see the point in checking this in the kernel. If I pass the
second stage kernel a new dtb where my enable methods are different,
that was my choice as a user. If that doesn't work, that's my fault.

There are plenty of other things that might be completely different that
we don't sanity check, so I don't see why enable methods should be any
different.

[...]

> +/**
> + * kexec_check_cpu_die -  Check if cpu_die() will work on secondary processors.
> + */
> +
> +static int kexec_check_cpu_die(void)
> +{
> +       unsigned int cpu;
> +       unsigned int sum = 0;
> +
> +       /* For simplicity this also checks the primary CPU. */
> +
> +       for_each_cpu(cpu, cpu_all_mask) {
> +               if (cpu && (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_disable ||
> +                       cpu_ops[cpu]->cpu_disable(cpu))) {
> +                       sum++;
> +                       pr_err("%s: Error: "
> +                               "CPU %u does not support hot un-plug.\n",
> +                               __func__, cpu);
> +               }
> +       }
> +
> +       return sum ? -EOPNOTSUPP : 0;
> +}

We should really use disable_nonboot_cpus() for this. That way we don't
end up with a slightly different hotplug implementation for kexec. The
above is missing cpu_kill calls, for example, and I'm worried by the
possibility of further drift over time.

I understand from our face-to-face discussion that you didn't want to
require the PM infrastructure that disable_nonboot_cpus currently pulls
in due to the being dependent on CONFIG_PM_SLEEP_SMP which selects
CONFIG_PM_SLEEP and so on. The solution to that is to refactor the
Kconfig so we can have disable_nonboot_cpus without all the other PM
infrastructure.

> +
> +/**
> + * machine_kexec_prepare - Prepare for a kexec reboot.
> + *
> + * Called from the core kexec code when a kernel image is loaded.
> + */
> +
> +int machine_kexec_prepare(struct kimage *image)
> +{
> +       int result;

This seems to always be an error code. Call it 'err'.

> +       dtb_t *dtb = NULL;
> +       struct kexec_ctx *ctx;
> +       const struct kexec_segment *dtb_seg;
> +
> +       kexec_image_info(image);
> +
> +       result = kexec_check_cpu_die();
> +
> +       if (result)
> +               goto on_error;
> +
> +       result = kexec_ctx_alloc(image);
> +
> +       if (result)
> +               goto on_error;
> +
> +       ctx = kexec_image_to_ctx(image);
> +
> +       result = kexec_boot_info_init(&ctx->first, NULL);
> +
> +       if (result)
> +               goto on_error;
> +
> +       dtb_seg = kexec_find_dtb_seg(image);
> +
> +       if (!dtb_seg) {
> +               result = -EINVAL;
> +               goto on_error;
> +       }
> +
> +       result = kexec_copy_dtb(dtb_seg, &dtb);
> +
> +       if (result)
> +               goto on_error;
> +
> +       result = kexec_boot_info_init(&ctx->second, dtb);
> +
> +       if (result)
> +               goto on_error;
> +
> +       result = kexec_compat_check(ctx);
> +
> +       if (result)
> +               goto on_error;
> +
> +       kexec_dtb_addr = dtb_seg->mem;
> +       kexec_kimage_start = image->start;
> +
> +       goto on_exit;
> +
> +on_error:
> +       kexec_ctx_clean(image);
> +on_exit:

on_* looks weird, and doesn't match the style of other labels in
arch/arm64. Could we call these 'out_clean' and 'out' instead?

> +       kfree(dtb);
> +       return result;
> +}
> +
> +/**
> + * kexec_list_flush_cb - Callback to flush the kimage list to PoC.
> + */
> +
> +static void kexec_list_flush_cb(void *ctx , unsigned int flag,
> +       void *addr, void *dest)
> +{
> +       switch (flag) {
> +       case IND_INDIRECTION:
> +       case IND_SOURCE:
> +               __flush_dcache_area(addr, PAGE_SIZE);

Is PAGE_SIZE always big enough? Do we not have a more accurate size?

Perhaps I've misunderstood what's going on here.

> +               break;
> +       default:
> +               break;
> +       }
> +}
> +
> +/**
> + * machine_kexec - Do the kexec reboot.
> + *
> + * Called from the core kexec code for a sys_reboot with LINUX_REBOOT_CMD_KEXEC.
> + */
> +
> +void machine_kexec(struct kimage *image)
> +{
> +       phys_addr_t reboot_code_buffer_phys;
> +       void *reboot_code_buffer;
> +       struct kexec_ctx *ctx = kexec_image_to_ctx(image);
> +
> +       BUG_ON(relocate_new_kernel_size > KEXEC_CONTROL_PAGE_SIZE);

It looks like relocate_new_kernel_size is a build-time constant. If we
need that to be less than KEXEC_CONTROL_PAGE_SIZE, then we should make
that a build-time check.

> +       BUG_ON(num_online_cpus() > 1);
> +       BUG_ON(!ctx);
> +
> +       kexec_image_info(image);
> +
> +       kexec_kimage_head = image->head;
> +
> +       reboot_code_buffer_phys = page_to_phys(image->control_code_page);
> +       reboot_code_buffer = phys_to_virt(reboot_code_buffer_phys);
> +
> +       pr_devel("%s:%d: control_code_page:        %p\n", __func__, __LINE__,
> +               (void *)image->control_code_page);

This is already a pointer. Is the cast to void necessary?

> +       pr_devel("%s:%d: reboot_code_buffer_phys:  %p\n", __func__, __LINE__,
> +               (void *)reboot_code_buffer_phys);

Use %pa and pass &reboot_code_buffer_phys, no cast necessary.

> +       pr_devel("%s:%d: reboot_code_buffer:       %p\n", __func__, __LINE__,
> +               reboot_code_buffer);
> +       pr_devel("%s:%d: relocate_new_kernel:      %p\n", __func__, __LINE__,
> +               relocate_new_kernel);
> +       pr_devel("%s:%d: relocate_new_kernel_size: %lxh(%lu) bytes\n", __func__,
> +               __LINE__, relocate_new_kernel_size, relocate_new_kernel_size);

Please use an '0x' prefix rather than a 'h' suffix. Do we need in print
in both hex and decimal?

> +
> +       pr_devel("%s:%d: kexec_dtb_addr:           %p\n", __func__, __LINE__,
> +               (void *)kexec_dtb_addr);
> +       pr_devel("%s:%d: kexec_kimage_head:        %p\n", __func__, __LINE__,
> +               (void *)kexec_kimage_head);
> +       pr_devel("%s:%d: kexec_kimage_start:       %p\n", __func__, __LINE__,
> +               (void *)kexec_kimage_start);

These are all unsigned long, so why not use the existing mechanism for
printing unsigned long?

> +
> +       /*
> +        * Copy relocate_new_kernel to the reboot_code_buffer for use
> +        * after the kernel is shut down.
> +        */
> +
> +       memcpy(reboot_code_buffer, relocate_new_kernel,
> +               relocate_new_kernel_size);
> +
> +       /* Assure reboot_code_buffer is copied. */
> +
> +       mb();

I don't think we need the mb if this is only to guarantee completion
before the cache flush -- cacheable memory accesses should hazard
against cache flushes by VA.

> +
> +       pr_info("Bye!\n");
> +
> +       local_disable(DAIF_ALL);

We can move these two right before the soft_restart, after the cache
maintenance. That way the print is closer to the exit of the current
kernel.

> +
> +       /* Flush the reboot_code_buffer in preparation for its execution. */
> +
> +       __flush_dcache_area(reboot_code_buffer, relocate_new_kernel_size);
> +
> +       /* Flush the kimage list. */
> +
> +       kexec_list_walk(NULL, image->head, kexec_list_flush_cb);
> +
> +       soft_restart(reboot_code_buffer_phys);
> +}
> +
> +void machine_crash_shutdown(struct pt_regs *regs)
> +{
> +       /* Empty routine needed to avoid build errors. */
> +}
> diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S
> new file mode 100644
> index 0000000..92aba9d
> --- /dev/null
> +++ b/arch/arm64/kernel/relocate_kernel.S
> @@ -0,0 +1,185 @@
> +/*
> + * kexec for arm64
> + *
> + * Copyright (C) Linaro.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include <asm/assembler.h>
> +#include <asm/memory.h>
> +#include <asm/page.h>
> +
> +/* The list entry flags. */
> +
> +#define IND_DESTINATION_BIT 0
> +#define IND_INDIRECTION_BIT 1
> +#define IND_DONE_BIT        2
> +#define IND_SOURCE_BIT      3

Given these ned to match the existing IND_* flags in
include/linux/kexec.h, and they aren't in any way specific to arm64,
please put these ina an asm-generic header and redefine the existing
IND_* flags in terms of them.

> +
> +/*
> + * relocate_new_kernel - Put the 2nd stage kernel image in place and boot it.
> + *
> + * The memory that the old kernel occupies may be overwritten when coping the
> + * new kernel to its final location.  To assure that the relocate_new_kernel
> + * routine which does that copy is not overwritten all code and data needed
> + * by relocate_new_kernel must be between the symbols relocate_new_kernel and
> + * relocate_new_kernel_end.  The machine_kexec() routine will copy
> + * relocate_new_kernel to the kexec control_code_page, a special page which
> + * has been set up to be preserved during the kernel copy operation.
> + */
> +
> +.align 3

Surely this isn't necessary?

> +
> +.globl relocate_new_kernel
> +relocate_new_kernel:
> +
> +       /* Setup the list loop variables. */
> +
> +       ldr     x10, kexec_kimage_head          /* x10 = list entry */

Any reason for using x10 rather than starting with x0? Or x18, if you
need to preserve the low registers?

> +
> +       mrs     x0, ctr_el0
> +       ubfm    x0, x0, #16, #19
> +       mov     x11, #4
> +       lsl     x11, x11, x0                    /* x11 = dcache line size */

Any reason we can't use dcache_line_size, given it's a macro?

> +
> +       mov     x12, xzr                        /* x12 = segment start */
> +       mov     x13, xzr                        /* x13 = entry ptr */
> +       mov     x14, xzr                        /* x14 = copy dest */
> +
> +       /* Check if the new kernel needs relocation. */
> +
> +       cbz     x10, .Ldone
> +       tbnz    x10, IND_DONE_BIT, .Ldone
> +
> +.Lloop:

Is there any reason for the '.L' on all of these? We only seem to do
that in the lib code that was imported from elsewhere, and it doesn't
match the rest of the arm64 asm.

> +       and     x15, x10, PAGE_MASK             /* x15 = addr */
> +
> +       /* Test the entry flags. */
> +
> +.Ltest_source:
> +       tbz     x10, IND_SOURCE_BIT, .Ltest_indirection
> +
> +       /* copy_page(x20 = dest, x21 = src) */
> +
> +       mov x20, x14
> +       mov x21, x15
> +
> +1:     ldp     x22, x23, [x21]
> +       ldp     x24, x25, [x21, #16]
> +       ldp     x26, x27, [x21, #32]
> +       ldp     x28, x29, [x21, #48]
> +       add     x21, x21, #64
> +       stnp    x22, x23, [x20]
> +       stnp    x24, x25, [x20, #16]
> +       stnp    x26, x27, [x20, #32]
> +       stnp    x28, x29, [x20, #48]
> +       add     x20, x20, #64
> +       tst     x21, #(PAGE_SIZE - 1)
> +       b.ne    1b

It's a shame we can't reuse copy_page directly. Could we not move the
body to a macro we can reuse here?

> +
> +       /* dest += PAGE_SIZE */
> +
> +       add     x14, x14, PAGE_SIZE
> +       b       .Lnext
> +
> +.Ltest_indirection:
> +       tbz     x10, IND_INDIRECTION_BIT, .Ltest_destination
> +
> +       /* ptr = addr */
> +
> +       mov     x13, x15
> +       b       .Lnext
> +
> +.Ltest_destination:
> +       tbz     x10, IND_DESTINATION_BIT, .Lnext
> +
> +       /* flush segment */
> +
> +       bl      .Lflush
> +       mov     x12, x15
> +
> +       /* dest = addr */
> +
> +       mov     x14, x15
> +
> +.Lnext:
> +       /* entry = *ptr++ */
> +
> +       ldr     x10, [x13]
> +       add     x13, x13, 8

This can be:

	ldr	x10, [x13], #8

> +
> +       /* while (!(entry & DONE)) */
> +
> +       tbz     x10, IND_DONE_BIT, .Lloop
> +
> +.Ldone:
> +       /* flush last segment */
> +
> +       bl      .Lflush
> +
> +       dsb     sy
> +       isb
> +       ic      ialluis

This doesn't look right; we need a dsb and an isb after the instruction
cache maintenance (or the icache could still be flushing when we branch
to the new kernel).

> +
> +       /* start_new_kernel */
> +
> +       ldr     x4, kexec_kimage_start
> +       ldr     x0, kexec_dtb_addr
> +       mov     x1, xzr
> +       mov     x2, xzr
> +       mov     x3, xzr
> +       br      x4
> +
> +/* flush - x11 = line size, x12 = start addr, x14 = end addr. */
> +
> +.Lflush:
> +       cbz     x12, 2f
> +       mov     x0, x12
> +       sub     x1, x11, #1
> +       bic     x0, x0, x1
> +1:     dc      civac, x0
> +       add     x0, x0, x11
> +       cmp     x0, x14
> +       b.lo    1b
> +2:     ret

It would be nice if this were earlier in the file, before its callers.

> +
> +.align 3

We should have a comment as to why this is needed (to keep the 64-bit
values below naturally aligned).

Thanks,
Mark.

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 01/13] arm64: Add ESR_EL2_EC macros to hyp-stub
  2014-09-15 16:10     ` Mark Rutland
@ 2014-09-22 21:45       ` Geoff Levand
  -1 siblings, 0 replies; 80+ messages in thread
From: Geoff Levand @ 2014-09-22 21:45 UTC (permalink / raw)
  To: linux-arm-kernel


Hi Mark,

On Mon, 2014-09-15 at 17:10 +0100, Mark Rutland wrote:
> On Tue, Sep 09, 2014 at 11:49:04PM +0100, Geoff Levand wrote:
> > +#define ESR_EL2_EC_SHIFT	26
> > +#define ESR_EL2_EC_HVC64	0x16
> 
> These exist in arch/arm64/include/asm/kvm_arm.h, no?
> 
> If anything that should be folded into arch/arm64/include/asm/esr.h...

OK, I'll remove this patch and add the esr.h fixup into my generic
'arm64 enhancements' series.

-Geoff

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH 01/13] arm64: Add ESR_EL2_EC macros to hyp-stub
@ 2014-09-22 21:45       ` Geoff Levand
  0 siblings, 0 replies; 80+ messages in thread
From: Geoff Levand @ 2014-09-22 21:45 UTC (permalink / raw)
  To: Mark Rutland
  Cc: Marc Zyngier, Catalin Marinas, Will Deacon, linux-arm-kernel,
	kexec, christoffer.dall


Hi Mark,

On Mon, 2014-09-15 at 17:10 +0100, Mark Rutland wrote:
> On Tue, Sep 09, 2014 at 11:49:04PM +0100, Geoff Levand wrote:
> > +#define ESR_EL2_EC_SHIFT	26
> > +#define ESR_EL2_EC_HVC64	0x16
> 
> These exist in arch/arm64/include/asm/kvm_arm.h, no?
> 
> If anything that should be folded into arch/arm64/include/asm/esr.h...

OK, I'll remove this patch and add the esr.h fixup into my generic
'arm64 enhancements' series.

-Geoff



_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 03/13] arm64: Convert hcalls to use ISS field
  2014-09-15 17:57     ` Mark Rutland
@ 2014-09-22 21:46       ` Geoff Levand
  -1 siblings, 0 replies; 80+ messages in thread
From: Geoff Levand @ 2014-09-22 21:46 UTC (permalink / raw)
  To: linux-arm-kernel


Hi Mark,

On Mon, 2014-09-15 at 18:57 +0100, Mark Rutland wrote:
> On Tue, Sep 09, 2014 at 11:49:04PM +0100, Geoff Levand wrote:
> > +/*
> > + * HVC_GET_VECTORS - Return the value of the vbar_el2 register.
> > + */
> > +
> > +#define HVC_GET_VECTORS 1
> > +
> > +/*
> > + * HVC_SET_VECTORS - Set the value of the vbar_el2 register.
> > + *
> > + * @x0: Physical address of the new vector table.
> > + */
> > +
> > +#define HVC_SET_VECTORS 2
> > +
> > +/*
> > + * HVC_KVM_CALL_HYP - Execute kvm_call_hyp routine.
> > + */
> > +
> > +#define HVC_KVM_CALL_HYP 3
> 
> If this can be used without KVM (e.g. in the hyp stub) I'd just call
> this HVC_CALL_HYP, or the name will be a little misleading.

Yes, it is more or less generic, so we could have it as HVC_CALL_HYP.

> >  #ifndef __ASSEMBLY__
> >  
> >  /*
> > diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
> > index 2d960a9..9ab5f70 100644
> > --- a/arch/arm64/kernel/hyp-stub.S
> > +++ b/arch/arm64/kernel/hyp-stub.S
> > @@ -54,16 +54,29 @@ ENDPROC(__hyp_stub_vectors)
> >  
> >  #define ESR_EL2_EC_SHIFT	26
> >  #define ESR_EL2_EC_HVC64	0x16
> > +#define ESR_EL2_ISS		0xffff
> 
> The last patch tried to add an identical macro to a header file. Can we
> use that header please?

As I mentioned in my reply to your comment on that patch, I'll prepare a
separate header/macro cleanup patch.

> >  
> >  el1_sync:
> > -	mrs	x1, esr_el2
> > -	lsr	x1, x1, #ESR_EL2_EC_SHIFT
> > -	cmp	x1, #ESR_EL2_EC_HVC64
> > -	b.ne	2f				// Not an HVC trap
> > -	cbz	x0, 1f
> > -	msr	vbar_el2, x0			// Set vbar_el2
> > +	mrs	x10, esr_el2
> 
> Any reason for using x10?
> 
> If we want to preserve the lowest register numbers, start with the
> highest caller-saved register numbers (i.e. x18). At least for me it
> makes the code far easier to read; it doesn't make it look like x10 is
> special.

OK, sure.

> > +	lsr	x9, x10, #ESR_EL2_EC_SHIFT	// x9=EC
> > +	and	x10, x10, #ESR_EL2_ISS		// x10=ISS
> 
> The mnemonics make these comments redundant.
> 
> > +	cmp     x9, #ESR_EL2_EC_HVC64
> > +	b.ne    2f                              // Not a host HVC trap
> 
> Now that we have the nice mnemonic, we could get rid of the comment
> here. I'd drop the 'host' from the comment; it wasn't there orginally
> and it's somewhat meaningless for the stub (KVM isn't up yet, and the
> only the native OS can make a HVC).

Sure, I copied this from the KVM vector so they would be more similar.

> 
> > +	mrs     x9, vttbr_el2
> > +	cbnz    x9, 2f                          // Not a host HVC trap
> 
> I don't understand this. When is vttbr_el2 non-zero, and why do we want
> to silently return from a HVC in that case? That didn't seem to be the
> case in the original code.

No it is not in the original.  I copied this from the KVM vector so they
would be more similar.

> > +
> > +	cmp	x10, #HVC_GET_VECTORS
> > +	b.ne	1f
> > +	mrs	x0, vbar_el2
> >  	b	2f
> > -1:	mrs	x0, vbar_el2			// Return vbar_el2
> > +
> > +1:	cmp	x10, #HVC_SET_VECTORS
> > +	b.ne	1f
> > +	msr	vbar_el2, x0
> > +
> > +1:
> 
> It feels like we should explode if we ever reach here from the host --
> if we've made an unsupported HVC wereally want to know that we've done
> so.

Sure, I can put something in, but I would think that whoever tries a bad
hcall is going to find out it doesn't work regardless.

Do you have a good idea of how we can do this 'explode'?

-Geoff

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH 03/13] arm64: Convert hcalls to use ISS field
@ 2014-09-22 21:46       ` Geoff Levand
  0 siblings, 0 replies; 80+ messages in thread
From: Geoff Levand @ 2014-09-22 21:46 UTC (permalink / raw)
  To: Mark Rutland
  Cc: Marc Zyngier, Catalin Marinas, Will Deacon, linux-arm-kernel,
	kexec, christoffer.dall


Hi Mark,

On Mon, 2014-09-15 at 18:57 +0100, Mark Rutland wrote:
> On Tue, Sep 09, 2014 at 11:49:04PM +0100, Geoff Levand wrote:
> > +/*
> > + * HVC_GET_VECTORS - Return the value of the vbar_el2 register.
> > + */
> > +
> > +#define HVC_GET_VECTORS 1
> > +
> > +/*
> > + * HVC_SET_VECTORS - Set the value of the vbar_el2 register.
> > + *
> > + * @x0: Physical address of the new vector table.
> > + */
> > +
> > +#define HVC_SET_VECTORS 2
> > +
> > +/*
> > + * HVC_KVM_CALL_HYP - Execute kvm_call_hyp routine.
> > + */
> > +
> > +#define HVC_KVM_CALL_HYP 3
> 
> If this can be used without KVM (e.g. in the hyp stub) I'd just call
> this HVC_CALL_HYP, or the name will be a little misleading.

Yes, it is more or less generic, so we could have it as HVC_CALL_HYP.

> >  #ifndef __ASSEMBLY__
> >  
> >  /*
> > diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
> > index 2d960a9..9ab5f70 100644
> > --- a/arch/arm64/kernel/hyp-stub.S
> > +++ b/arch/arm64/kernel/hyp-stub.S
> > @@ -54,16 +54,29 @@ ENDPROC(__hyp_stub_vectors)
> >  
> >  #define ESR_EL2_EC_SHIFT	26
> >  #define ESR_EL2_EC_HVC64	0x16
> > +#define ESR_EL2_ISS		0xffff
> 
> The last patch tried to add an identical macro to a header file. Can we
> use that header please?

As I mentioned in my reply to your comment on that patch, I'll prepare a
separate header/macro cleanup patch.

> >  
> >  el1_sync:
> > -	mrs	x1, esr_el2
> > -	lsr	x1, x1, #ESR_EL2_EC_SHIFT
> > -	cmp	x1, #ESR_EL2_EC_HVC64
> > -	b.ne	2f				// Not an HVC trap
> > -	cbz	x0, 1f
> > -	msr	vbar_el2, x0			// Set vbar_el2
> > +	mrs	x10, esr_el2
> 
> Any reason for using x10?
> 
> If we want to preserve the lowest register numbers, start with the
> highest caller-saved register numbers (i.e. x18). At least for me it
> makes the code far easier to read; it doesn't make it look like x10 is
> special.

OK, sure.

> > +	lsr	x9, x10, #ESR_EL2_EC_SHIFT	// x9=EC
> > +	and	x10, x10, #ESR_EL2_ISS		// x10=ISS
> 
> The mnemonics make these comments redundant.
> 
> > +	cmp     x9, #ESR_EL2_EC_HVC64
> > +	b.ne    2f                              // Not a host HVC trap
> 
> Now that we have the nice mnemonic, we could get rid of the comment
> here. I'd drop the 'host' from the comment; it wasn't there orginally
> and it's somewhat meaningless for the stub (KVM isn't up yet, and the
> only the native OS can make a HVC).

Sure, I copied this from the KVM vector so they would be more similar.

> 
> > +	mrs     x9, vttbr_el2
> > +	cbnz    x9, 2f                          // Not a host HVC trap
> 
> I don't understand this. When is vttbr_el2 non-zero, and why do we want
> to silently return from a HVC in that case? That didn't seem to be the
> case in the original code.

No it is not in the original.  I copied this from the KVM vector so they
would be more similar.

> > +
> > +	cmp	x10, #HVC_GET_VECTORS
> > +	b.ne	1f
> > +	mrs	x0, vbar_el2
> >  	b	2f
> > -1:	mrs	x0, vbar_el2			// Return vbar_el2
> > +
> > +1:	cmp	x10, #HVC_SET_VECTORS
> > +	b.ne	1f
> > +	msr	vbar_el2, x0
> > +
> > +1:
> 
> It feels like we should explode if we ever reach here from the host --
> if we've made an unsupported HVC wereally want to know that we've done
> so.

Sure, I can put something in, but I would think that whoever tries a bad
hcall is going to find out it doesn't work regardless.

Do you have a good idea of how we can do this 'explode'?

-Geoff





_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 06/13] arm64: Add new routine read_cpu_properties
  2014-09-15 18:42     ` Mark Rutland
@ 2014-09-25  0:23       ` Geoff Levand
  -1 siblings, 0 replies; 80+ messages in thread
From: Geoff Levand @ 2014-09-25  0:23 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Mark,

On Mon, 2014-09-15 at 19:42 +0100, Mark Rutland wrote:
> Hi Geoff,
> 
> On Tue, Sep 09, 2014 at 11:49:04PM +0100, Geoff Levand wrote:
> > The kexec re-boot support that will be added in a subsequent patch in this
> > series will need to read the device tree CPU properties, and it is expected
> > that a rework of the SMP spin table code to handle cpu_die will also need this
> > functionality, so add two new common arm64 files cpu-properties.h and
> > cpu-properties.c that define a new structure cpu_properties that hold the
> > various CPU properties from a device tree, and the new routine
> > read_cpu_properties() that fills the structure from a device tree CPU node.
> 
> I'm very much not keen on placing all this information in a single
> structure -- that adds a new tight coupling that we didn't have before,
> and it looks like it's going to be painful to maintain.
> 
> If kexec uses the existing high-level hotplug infrastructure, it has no
> reason to go anywhere near this information.
> 
> So I really don't like this patch.

I decided to just strip all the checking out of the kernel with the
expectation that the user space program will set things up correctly,
so this patch is no longer needed.

-Geoff

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH 06/13] arm64: Add new routine read_cpu_properties
@ 2014-09-25  0:23       ` Geoff Levand
  0 siblings, 0 replies; 80+ messages in thread
From: Geoff Levand @ 2014-09-25  0:23 UTC (permalink / raw)
  To: Mark Rutland
  Cc: Marc Zyngier, Catalin Marinas, Will Deacon, linux-arm-kernel,
	kexec, christoffer.dall

Hi Mark,

On Mon, 2014-09-15 at 19:42 +0100, Mark Rutland wrote:
> Hi Geoff,
> 
> On Tue, Sep 09, 2014 at 11:49:04PM +0100, Geoff Levand wrote:
> > The kexec re-boot support that will be added in a subsequent patch in this
> > series will need to read the device tree CPU properties, and it is expected
> > that a rework of the SMP spin table code to handle cpu_die will also need this
> > functionality, so add two new common arm64 files cpu-properties.h and
> > cpu-properties.c that define a new structure cpu_properties that hold the
> > various CPU properties from a device tree, and the new routine
> > read_cpu_properties() that fills the structure from a device tree CPU node.
> 
> I'm very much not keen on placing all this information in a single
> structure -- that adds a new tight coupling that we didn't have before,
> and it looks like it's going to be painful to maintain.
> 
> If kexec uses the existing high-level hotplug infrastructure, it has no
> reason to go anywhere near this information.
> 
> So I really don't like this patch.

I decided to just strip all the checking out of the kernel with the
expectation that the user space program will set things up correctly,
so this patch is no longer needed.

-Geoff




_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 07/13] arm64: Add new routine local_disable
  2014-09-15 18:56     ` Mark Rutland
@ 2014-09-25  0:24       ` Geoff Levand
  -1 siblings, 0 replies; 80+ messages in thread
From: Geoff Levand @ 2014-09-25  0:24 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Mark,

On Mon, 2014-09-15 at 19:56 +0100, Mark Rutland wrote:
> Hi Geoff,
> 
> On Tue, Sep 09, 2014 at 11:49:04PM +0100, Geoff Levand wrote:
> > Add the new arm64 routine local_disable() to allow the masking of several DAIF
> > flags in one operation.  Currently, we only have routines to mask individual
> > flags, and to mask several flags multiple calls to daifset are required.
> >
> > Signed-off-by: Geoff Levand <geoff@infradead.org>
> > ---
> >  arch/arm64/include/asm/irqflags.h | 13 +++++++++++++
> >  1 file changed, 13 insertions(+)
> > 
> > diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
> > index 11cc941..28521d4 100644
> > --- a/arch/arm64/include/asm/irqflags.h
> > +++ b/arch/arm64/include/asm/irqflags.h
> > @@ -113,5 +113,18 @@ static inline int arch_irqs_disabled_flags(unsigned long flags)
> >  #define local_dbg_enable()	asm("msr	daifclr, #8" : : : "memory")
> >  #define local_dbg_disable()	asm("msr	daifset, #8" : : : "memory")
> >  
> > +enum daif_flag {
> 
> Is there any reason for this to be an enum rather than a set of
> #defines?
> 
> It would be nice to be able to use these in asm.
> 
> > +	DAIF_FIQ   = (1UL << 6),
> > +	DAIF_IRQ   = (1UL << 7),
> > +	DAIF_ASYNC = (1UL << 8),
> > +	DAIF_DBG   = (1UL << 9),
> > +	DAIF_ALL   = (0xffUL << 6),
> 
> Not 0xf?
> 
> It would be nicer to OR the other flags.
> 
> > +};
> > +
> > +static inline void local_disable(unsigned long daif_flags)
> > +{
> > +	arch_local_irq_restore(daif_flags | arch_local_save_flags());
> 
> If we knew the value was a constant (which we could check with
> __builting_constant_p) we could use daifset here, rather than having a
> RMW sequence.
> 
> With that, the other local_*_{enable,disable} calls could be rewritten
> in terms of this, without affecting the generated code. That would
> require some shifting to account for the difference between pstate and
> daif{clr,set} layout, but for constants that shouldn't be a problem.

Since kexec just needs to mask all the DAIF exceptions before the main
CPU is reset I'll just do that inline, and put this rework of the
exception mask routines on my todo list.

-Geoff

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH 07/13] arm64: Add new routine local_disable
@ 2014-09-25  0:24       ` Geoff Levand
  0 siblings, 0 replies; 80+ messages in thread
From: Geoff Levand @ 2014-09-25  0:24 UTC (permalink / raw)
  To: Mark Rutland
  Cc: Marc Zyngier, Catalin Marinas, Will Deacon, linux-arm-kernel,
	kexec, christoffer.dall

Hi Mark,

On Mon, 2014-09-15 at 19:56 +0100, Mark Rutland wrote:
> Hi Geoff,
> 
> On Tue, Sep 09, 2014 at 11:49:04PM +0100, Geoff Levand wrote:
> > Add the new arm64 routine local_disable() to allow the masking of several DAIF
> > flags in one operation.  Currently, we only have routines to mask individual
> > flags, and to mask several flags multiple calls to daifset are required.
> >
> > Signed-off-by: Geoff Levand <geoff@infradead.org>
> > ---
> >  arch/arm64/include/asm/irqflags.h | 13 +++++++++++++
> >  1 file changed, 13 insertions(+)
> > 
> > diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
> > index 11cc941..28521d4 100644
> > --- a/arch/arm64/include/asm/irqflags.h
> > +++ b/arch/arm64/include/asm/irqflags.h
> > @@ -113,5 +113,18 @@ static inline int arch_irqs_disabled_flags(unsigned long flags)
> >  #define local_dbg_enable()	asm("msr	daifclr, #8" : : : "memory")
> >  #define local_dbg_disable()	asm("msr	daifset, #8" : : : "memory")
> >  
> > +enum daif_flag {
> 
> Is there any reason for this to be an enum rather than a set of
> #defines?
> 
> It would be nice to be able to use these in asm.
> 
> > +	DAIF_FIQ   = (1UL << 6),
> > +	DAIF_IRQ   = (1UL << 7),
> > +	DAIF_ASYNC = (1UL << 8),
> > +	DAIF_DBG   = (1UL << 9),
> > +	DAIF_ALL   = (0xffUL << 6),
> 
> Not 0xf?
> 
> It would be nicer to OR the other flags.
> 
> > +};
> > +
> > +static inline void local_disable(unsigned long daif_flags)
> > +{
> > +	arch_local_irq_restore(daif_flags | arch_local_save_flags());
> 
> If we knew the value was a constant (which we could check with
> __builting_constant_p) we could use daifset here, rather than having a
> RMW sequence.
> 
> With that, the other local_*_{enable,disable} calls could be rewritten
> in terms of this, without affecting the generated code. That would
> require some shifting to account for the difference between pstate and
> daif{clr,set} layout, but for constants that shouldn't be a problem.

Since kexec just needs to mask all the DAIF exceptions before the main
CPU is reset I'll just do that inline, and put this rework of the
exception mask routines on my todo list.

-Geoff




_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 04/13] arm64: Add new hcall HVC_CALL_FUNC
  2014-09-15 18:11     ` Mark Rutland
@ 2014-09-25  0:24       ` Geoff Levand
  -1 siblings, 0 replies; 80+ messages in thread
From: Geoff Levand @ 2014-09-25  0:24 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Mark,

On Mon, 2014-09-15 at 19:11 +0100, Mark Rutland wrote:
> On Tue, Sep 09, 2014 at 11:49:04PM +0100, Geoff Levand wrote:
> > Add the new hcall HVC_CALL_FUNC that allows execution of a function at EL2.
> > During CPU reset the CPU must be brought to the exception level it had on
> > entry to the kernel.  The HVC_CALL_FUNC hcall will provide the mechanism
> > needed for this exception level switch.
> > 
> > Signed-off-by: Geoff Levand <geoff@infradead.org>
> > ---
> >  arch/arm64/include/asm/virt.h | 11 +++++++++++
> >  arch/arm64/kernel/hyp-stub.S  | 10 ++++++++++
> >  2 files changed, 21 insertions(+)
> > 
> > diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
> > index 894fe53..b217fbc 100644
> > --- a/arch/arm64/include/asm/virt.h
> > +++ b/arch/arm64/include/asm/virt.h
> > @@ -41,6 +41,17 @@
> >  
> >  #define HVC_KVM_CALL_HYP 3
> >  
> > +/*
> > + * HVC_CALL_FUNC - Execute a function at EL2.
> > + *
> > + * @x0: Physical address of the funtion to be executed.
> > + * @x1: Passed as the first argument to @fn.
> > + * @x2: Passed as the second argument to @fn.
> > + * @x3: Passed as the third argument to @fn.
> > + */
> > +
> > +#define HVC_CALL_FUNC 4
> > +
> 
> Can't we use the HVC_KVM_CALL_HYP for this as well? I thought we already
> added the code to the stub to do that in the last patch.

The last patch (arm64: Convert hcalls to use ISS field) only added
function call support to the kvm vector.

> Is there a difference between the two that I'm missing?

The kvm and stub el1_sync vectors are different in that the kvm vector
expects a stack to be set up whereas the stub vector does not.  We have
plenty of hcall numbers, so I thought it better to have two different
numbers so improper usage would be easier to detect.  

> >  #ifndef __ASSEMBLY__
> >  
> >  /*
> > diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
> > index 9ab5f70..a21cf51 100644
> > --- a/arch/arm64/kernel/hyp-stub.S
> > +++ b/arch/arm64/kernel/hyp-stub.S
> > @@ -75,7 +75,17 @@ el1_sync:
> >  1:	cmp	x10, #HVC_SET_VECTORS
> >  	b.ne	1f
> >  	msr	vbar_el2, x0
> > +	b	2f
> >  
> > +1:	cmp	x10, #HVC_CALL_FUNC
> > +	b.ne    1f
> > +	mov	x29, lr
> 
> What's the contract for functions we call through the stub?

My thinking was that this is just a mechanism to do the call.
The caller and callee would need to set the conditions of the
call.  The only restriction is that the called function would
need to preserve the register that lr is saved in (x29 here,
but x18 in the current implementation).

-Geoff

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH 04/13] arm64: Add new hcall HVC_CALL_FUNC
@ 2014-09-25  0:24       ` Geoff Levand
  0 siblings, 0 replies; 80+ messages in thread
From: Geoff Levand @ 2014-09-25  0:24 UTC (permalink / raw)
  To: Mark Rutland
  Cc: Marc Zyngier, Catalin Marinas, Will Deacon, linux-arm-kernel,
	kexec, christoffer.dall

Hi Mark,

On Mon, 2014-09-15 at 19:11 +0100, Mark Rutland wrote:
> On Tue, Sep 09, 2014 at 11:49:04PM +0100, Geoff Levand wrote:
> > Add the new hcall HVC_CALL_FUNC that allows execution of a function at EL2.
> > During CPU reset the CPU must be brought to the exception level it had on
> > entry to the kernel.  The HVC_CALL_FUNC hcall will provide the mechanism
> > needed for this exception level switch.
> > 
> > Signed-off-by: Geoff Levand <geoff@infradead.org>
> > ---
> >  arch/arm64/include/asm/virt.h | 11 +++++++++++
> >  arch/arm64/kernel/hyp-stub.S  | 10 ++++++++++
> >  2 files changed, 21 insertions(+)
> > 
> > diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
> > index 894fe53..b217fbc 100644
> > --- a/arch/arm64/include/asm/virt.h
> > +++ b/arch/arm64/include/asm/virt.h
> > @@ -41,6 +41,17 @@
> >  
> >  #define HVC_KVM_CALL_HYP 3
> >  
> > +/*
> > + * HVC_CALL_FUNC - Execute a function at EL2.
> > + *
> > + * @x0: Physical address of the funtion to be executed.
> > + * @x1: Passed as the first argument to @fn.
> > + * @x2: Passed as the second argument to @fn.
> > + * @x3: Passed as the third argument to @fn.
> > + */
> > +
> > +#define HVC_CALL_FUNC 4
> > +
> 
> Can't we use the HVC_KVM_CALL_HYP for this as well? I thought we already
> added the code to the stub to do that in the last patch.

The last patch (arm64: Convert hcalls to use ISS field) only added
function call support to the kvm vector.

> Is there a difference between the two that I'm missing?

The kvm and stub el1_sync vectors are different in that the kvm vector
expects a stack to be set up whereas the stub vector does not.  We have
plenty of hcall numbers, so I thought it better to have two different
numbers so improper usage would be easier to detect.  

> >  #ifndef __ASSEMBLY__
> >  
> >  /*
> > diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
> > index 9ab5f70..a21cf51 100644
> > --- a/arch/arm64/kernel/hyp-stub.S
> > +++ b/arch/arm64/kernel/hyp-stub.S
> > @@ -75,7 +75,17 @@ el1_sync:
> >  1:	cmp	x10, #HVC_SET_VECTORS
> >  	b.ne	1f
> >  	msr	vbar_el2, x0
> > +	b	2f
> >  
> > +1:	cmp	x10, #HVC_CALL_FUNC
> > +	b.ne    1f
> > +	mov	x29, lr
> 
> What's the contract for functions we call through the stub?

My thinking was that this is just a mechanism to do the call.
The caller and callee would need to set the conditions of the
call.  The only restriction is that the called function would
need to preserve the register that lr is saved in (x29 here,
but x18 in the current implementation).

-Geoff




_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 08/13] arm64: Use cpu_ops for smp_stop
  2014-09-15 19:06     ` Mark Rutland
@ 2014-09-25  0:24       ` Geoff Levand
  -1 siblings, 0 replies; 80+ messages in thread
From: Geoff Levand @ 2014-09-25  0:24 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Mark,

On Mon, 2014-09-15 at 20:06 +0100, Mark Rutland wrote:
> Hi Geoff,
> 
> On Tue, Sep 09, 2014 at 11:49:05PM +0100, Geoff Levand wrote:
> > The current implementation of ipi_cpu_stop() is just a tight infinite loop
> > around cpu_relax().  This infinite loop implementation is OK if the machine
> > will soon do a poweroff, but it doesn't have any mechanism to allow a CPU
> > to be brought back on-line, nor is it compatible with kexec re-boot.
> 
> I don't see why we should use this when we have disable_nonboot_cpus.

I decided to use disable_nonboot_cpus, so this patch is no longer
needed.  I'll put the rework of disable_nonboot_cpus to not depend on
CONFIG_PM_SLEEP_SMP on my todo list.

-Geoff

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH 08/13] arm64: Use cpu_ops for smp_stop
@ 2014-09-25  0:24       ` Geoff Levand
  0 siblings, 0 replies; 80+ messages in thread
From: Geoff Levand @ 2014-09-25  0:24 UTC (permalink / raw)
  To: Mark Rutland
  Cc: Marc Zyngier, Catalin Marinas, Will Deacon, linux-arm-kernel,
	kexec, christoffer.dall

Hi Mark,

On Mon, 2014-09-15 at 20:06 +0100, Mark Rutland wrote:
> Hi Geoff,
> 
> On Tue, Sep 09, 2014 at 11:49:05PM +0100, Geoff Levand wrote:
> > The current implementation of ipi_cpu_stop() is just a tight infinite loop
> > around cpu_relax().  This infinite loop implementation is OK if the machine
> > will soon do a poweroff, but it doesn't have any mechanism to allow a CPU
> > to be brought back on-line, nor is it compatible with kexec re-boot.
> 
> I don't see why we should use this when we have disable_nonboot_cpus.

I decided to use disable_nonboot_cpus, so this patch is no longer
needed.  I'll put the rework of disable_nonboot_cpus to not depend on
CONFIG_PM_SLEEP_SMP on my todo list.

-Geoff




_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 11/13] arm64/kexec: Add core kexec support
  2014-09-18  1:13     ` Mark Rutland
@ 2014-09-25  0:25       ` Geoff Levand
  -1 siblings, 0 replies; 80+ messages in thread
From: Geoff Levand @ 2014-09-25  0:25 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Mark,

On Thu, 2014-09-18 at 02:13 +0100, Mark Rutland wrote:
> On Tue, Sep 09, 2014 at 11:49:05PM +0100, Geoff Levand wrote:

> > +++ b/arch/arm64/include/asm/kexec.h
> > @@ -0,0 +1,52 @@
> > +/*
> > + * kexec for arm64
> > + *
> > + * Copyright (C) Linaro.
> > + *
> > + * This program is free software; you can redistribute it and/or modify
> > + * it under the terms of the GNU General Public License version 2 as
> > + * published by the Free Software Foundation.
> > + */
> > +
> > +#if !defined(_ARM64_KEXEC_H)
> > +#define _ARM64_KEXEC_H
> > +
> > +/* Maximum physical address we can use pages from */
> > +
> > +#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
> > +
> > +/* Maximum address we can reach in physical address mode */
> > +
> > +#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
> > +
> > +/* Maximum address we can use for the control code buffer */
> > +
> > +#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL)
> > +
> 
> What are these used for? I see that other architectures seem to do the
> same thing, but they look odd.

They need to be defined for the core kexec code, but arm64
doesn't use them.

> > +#define KEXEC_CONTROL_PAGE_SIZE        4096
> 
> What's this used for?

This is the size reserved for the reboot_code_buffer defined in
kexec's core code.  For arm64, we copy our relocate_new_kernel
routine into the reboot_code_buffer.

> Does this work with 64k pages, and is there any reason we can't figure
> out the actual size of the code (so we don't get bitten if it grows)?

Kexec will reserve pages to satisfy KEXEC_CONTROL_PAGE_SIZE, so for
all arm64 page configs one page will be reserved for this value (4096).

I have a check if relocate_new_kernel is too big
'.org KEXEC_CONTROL_PAGE_SIZE' in the latest implementation.

> > +
> > +#define KEXEC_ARCH KEXEC_ARCH_ARM64
> > +
> > +#define ARCH_HAS_KIMAGE_ARCH
> > +
> > +#if !defined(__ASSEMBLY__)
> > +
> > +struct kimage_arch {
> > +       void *ctx;
> > +};
> > +
> > +/**
> > + * crash_setup_regs() - save registers for the panic kernel
> > + *
> > + * @newregs: registers are saved here
> > + * @oldregs: registers to be saved (may be %NULL)
> > + */
> > +
> > +static inline void crash_setup_regs(struct pt_regs *newregs,
> > +                                   struct pt_regs *oldregs)
> > +{
> > +}
> 
> It would be nice to know what we're going to do for this.
> 
> Is this a required function, or can we get away without crash kernel
> support for the moment?

This is just to avoid a build error.  It is not used for kexec re-boot.

> > +
> > +#endif /* !defined(__ASSEMBLY__) */
> > +
> > +#endif
> > diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
> > index df7ef87..8b7c029 100644
> > --- a/arch/arm64/kernel/Makefile
> > +++ b/arch/arm64/kernel/Makefile
> > @@ -29,6 +29,8 @@ arm64-obj-$(CONFIG_ARM64_CPU_SUSPEND) += sleep.o suspend.o
> >  arm64-obj-$(CONFIG_JUMP_LABEL)         += jump_label.o
> >  arm64-obj-$(CONFIG_KGDB)               += kgdb.o
> >  arm64-obj-$(CONFIG_EFI)                        += efi.o efi-stub.o efi-entry.o
> > +arm64-obj-$(CONFIG_KEXEC)              += machine_kexec.o relocate_kernel.o    \
> > +                                          cpu-properties.o
> >
> >  obj-y                                  += $(arm64-obj-y) vdso/
> >  obj-m                                  += $(arm64-obj-m)
> > diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
> > new file mode 100644
> > index 0000000..043a3bc
> > --- /dev/null
> > +++ b/arch/arm64/kernel/machine_kexec.c
> > @@ -0,0 +1,612 @@
> > +/*
> > + * kexec for arm64
> > + *
> > + * Copyright (C) Linaro.
> > + *
> > + * This program is free software; you can redistribute it and/or modify
> > + * it under the terms of the GNU General Public License version 2 as
> > + * published by the Free Software Foundation.
> > + */
> > +
> > +#include <linux/kernel.h>
> > +#include <linux/kexec.h>
> > +#include <linux/of_fdt.h>
> > +#include <linux/slab.h>
> > +#include <linux/uaccess.h>
> > +
> > +#include <asm/cacheflush.h>
> > +#include <asm/cpu_ops.h>
> > +#include <asm/system_misc.h>
> > +
> > +#include "cpu-properties.h"
> > +
> > +#if defined(DEBUG)
> > +static const int debug = 1;
> > +#else
> > +static const int debug;
> > +#endif
> 
> I don't think we need this.

I put the debug output into another patch, which I'll
decide to post or not later.

> > +
> > +typedef struct dtb_buffer {char b[0]; } dtb_t;
> 
> It would be nice for this to be consistent with other dtb uses; if we
> need a dtb type then it shouldn't be specific to kexec.

This was to avoid errors due to the lack of type checking with
void* types.  I've reworked this in the latest patch.

> > +static struct kexec_ctx *current_ctx;
> > +
> > +static int kexec_ctx_alloc(struct kimage *image)
> > +{
> > +       BUG_ON(image->arch.ctx);
> > +
> > +       image->arch.ctx = kmalloc(sizeof(struct kexec_ctx), GFP_KERNEL);
> > +
> > +       if (!image->arch.ctx)
> > +               return -ENOMEM;
> > +
> > +       current_ctx = (struct kexec_ctx *)image->arch.ctx;
> 
> This seems to be the only use of current_ctx. I take it this is a
> leftover from debugging?
> 
> [...]
> 
> > +/**
> > + * kexec_list_walk - Helper to walk the kimage page list.
> > + */
> 
> Please keep this associated with the function it refers to (nothing
> should be between this comment and the function prototype).
> 
> > +
> > +#define IND_FLAGS (IND_DESTINATION | IND_INDIRECTION | IND_DONE | IND_SOURCE)
> 
> Can't this live in include/linux/kexec.h, where these flags are defined.

I have a kexec patch submitted to clean this up.  I'll re-factor
this when that patch is upstream.

  https://lists.ozlabs.org/pipermail/linuxppc-dev/2014-August/120368.html

> The meaning of these doesn't seem to be documented anywhere. Would you
> be able to explain what each of these means?

I think lack of comments/documentation is a general weakness of the
core kexec code.

> > +static void kexec_list_walk(void *ctx, unsigned long kimage_head,
> > +       void (*cb)(void *ctx, unsigned int flag, void *addr, void *dest))
> > +{
> > +       void *dest;
> > +       unsigned long *entry;
> > +
> > +       for (entry = &kimage_head, dest = NULL; ; entry++) {
> > +               unsigned int flag = *entry & IND_FLAGS;
> > +               void *addr = phys_to_virt(*entry & PAGE_MASK);
> > +
> > +               switch (flag) {
> > +               case IND_INDIRECTION:
> > +                       entry = (unsigned long *)addr - 1;
> > +                       cb(ctx, flag, addr, NULL);
> > +                       break;
> > +               case IND_DESTINATION:
> > +                       dest = addr;
> > +                       cb(ctx, flag, addr, NULL);
> > +                       break;
> > +               case IND_SOURCE:
> > +                       cb(ctx, flag, addr, dest);
> > +                       dest += PAGE_SIZE;
> 
> I really don't understand what's going on with dest here, but that's
> probably because I don't understand the meaning of the flags.

IND_SOURCE means the entry is a page of the current segment.  dest is
the address of that page.  When you have a new source page the
destination is post incremented.  Think foo(src, dest++).

> > +                       break;
> > +               case IND_DONE:
> > +                       cb(ctx, flag , NULL, NULL);
> > +                       return;
> > +               default:
> > +                       pr_devel("%s:%d unknown flag %xh\n", __func__, __LINE__,
> > +                               flag);
> 
> Wouldn't pr_warn would be more appropriate here?

We don't really don't need a message since the IND_ flags are well
established.  I'll remove this.

> 
> > +                       cb(ctx, flag, addr, NULL);
> > +                       break;
> > +               }
> > +       }
> > +}
> > +
> > +/**
> > + * kexec_image_info - For debugging output.
> > + */
> > +
> > +#define kexec_image_info(_i) _kexec_image_info(__func__, __LINE__, _i)
> > +static void _kexec_image_info(const char *func, int line,
> > +       const struct kimage *image)
> > +{
> > +       if (debug) {
> > +               unsigned long i;
> > +
> > +               pr_devel("%s:%d:\n", func, line);
> > +               pr_devel("  kexec image info:\n");
> > +               pr_devel("    type:        %d\n", image->type);
> > +               pr_devel("    start:       %lx\n", image->start);
> > +               pr_devel("    head:        %lx\n", image->head);
> > +               pr_devel("    nr_segments: %lu\n", image->nr_segments);
> > +
> > +               for (i = 0; i < image->nr_segments; i++) {
> > +                       pr_devel("      segment[%lu]: %016lx - %016lx, "
> > +                               "%lxh bytes, %lu pages\n",
> > +                               i,
> > +                               image->segment[i].mem,
> > +                               image->segment[i].mem + image->segment[i].memsz,
> > +                               image->segment[i].memsz,
> > +                               image->segment[i].memsz /  PAGE_SIZE);
> > +
> > +                       if (kexec_is_dtb_user(image->segment[i].buf))
> > +                               pr_devel("        dtb segment\n");
> > +               }
> > +       }
> > +}
> 
> pr_devel is already dependent on DEBUG, so surely we don't need to check
> the debug variable?

I'm not sure how much of this would be removed as dead code.  If
the compiler is cleaver enough it all should be.

> > +
> > +/**
> > + * kexec_find_dtb_seg - Helper routine to find the dtb segment.
> > + */
> > +
> > +static const struct kexec_segment *kexec_find_dtb_seg(
> > +       const struct kimage *image)
> > +{
> > +       int i;
> > +
> > +       for (i = 0; i < image->nr_segments; i++) {
> > +               if (kexec_is_dtb_user(image->segment[i].buf))
> > +                       return &image->segment[i];
> > +       }
> > +
> > +       return NULL;
> > +}
> 
> I'm really not keen on having the kernel guess which blobs need special
> treatment, though we seem to do that for arm.

Yes, to pass the dtb in r0 when th new kernel is entered.

> It would be far nicer if we could pass flags for each segment to
> describe what it is (e.g. kernel image, dtb, other binary blob), 

Well, we do pass a flag of sorts, the DT magic value.

> so we
> can do things like pass multiple DTBs (so we load two kernels at once
> and pass each a unique DTB if we want to boot a new kernel + crashkernel
> pair). Unfortunately that would require some fairly invasive rework of
> the kexec core.

I don't think I'll attempt that any time soon.  Feel free to
give it a try.

> For secureboot we can't trust a dtb from userspace, and will have to use
> kexec_file_load. To work with that we can either:
> 
> * Reuse the original DTB, patched with the new command line. This may
>   have statefulness issues (for things like simplefb).
> 
> * Build a new DTB by flattening the current live tree. This would rely
>   on drivers that modify state to patch the tree appropriately.

I have not yet looked into how to do this yet.

> [...]
> 
> > +/**
> > + * kexec_cpu_info_init - Initialize an array of kexec_cpu_info structures.
> > + *
> > + * Allocates a cpu info array and fills it with info for all cpus found in
> > + * the device tree passed.
> > + */
> > +
> > +static int kexec_cpu_info_init(const struct device_node *dn,
> > +       struct kexec_boot_info *info)
> > +{
> > +       int result;
> > +       unsigned int cpu;
> > +
> > +       info->cp = kmalloc(
> > +               info->cpu_count * sizeof(*info->cp), GFP_KERNEL);
> > +
> > +       if (!info->cp) {
> > +               pr_err("%s: Error: Out of memory.", __func__);
> > +               return -ENOMEM;
> > +       }
> > +
> > +       for (cpu = 0; cpu < info->cpu_count; cpu++) {
> > +               struct cpu_properties *cp = &info->cp[cpu];
> > +
> > +               dn = of_find_node_by_type((struct device_node *)dn, "cpu");
> > +
> > +               if (!dn) {
> > +                       pr_devel("%s:%d: bad node\n", __func__, __LINE__);
> > +                       goto on_error;
> > +               }
> > +
> > +               result = read_cpu_properties(cp, dn);
> > +
> > +               if (result) {
> > +                       pr_devel("%s:%d: bad node\n", __func__, __LINE__);
> > +                       goto on_error;
> > +               }
> > +
> > +               if (cp->type == cpu_enable_method_psci)
> > +                       pr_devel("%s:%d: cpu-%u: hwid-%llx, '%s'\n",
> > +                               __func__, __LINE__, cpu, cp->hwid,
> > +                               cp->enable_method);
> > +               else
> > +                       pr_devel("%s:%d: cpu-%u: hwid-%llx, '%s', "
> > +                               "cpu-release-addr %llx\n",
> > +                               __func__, __LINE__, cpu, cp->hwid,
> > +                               cp->enable_method,
> > +                               cp->cpu_release_addr);
> > +       }
> > +
> > +       return 0;
> > +
> > +on_error:
> > +       kfree(info->cp);
> > +       info->cp = NULL;
> > +       return -EINVAL;
> > +}
> 
> I don't see why we should need this at all. If we use the hotplug
> infrastructure, we don't need access to the enable-method and related
> properties, and the kexec code need only deal with a single CPU.

I removed all the checking in the latest patch.

> The only case where kexec needs to deal with other CPUs is when some are
> sat in the holding pen, but this code doesn't seem to handle that.
> 
> as I believe I mentioned before, we should be able to extend the holding
> pen code to get those CPUs to increment a sat-in-pen counter and if
> that's non-zero after SMP bringup we print a warning (and disallow
> kexec).

I have some work-in-progress patches that try to do this, but I will not
include those in this series.  See my spin-table branch:

  https://git.linaro.org/people/geoff.levand/linux-kexec.git

> > +/**
> > +* kexec_compat_check - Iterator for kexec_cpu_check.
> > +*/
> > +
> > +static int kexec_compat_check(const struct kexec_ctx *ctx)
> > +{
> > +       unsigned int cpu_1;
> > +       unsigned int to_process;
> > +
> > +       to_process = min(ctx->first.cpu_count, ctx->second.cpu_count);
> > +
> > +       if (ctx->first.cpu_count != ctx->second.cpu_count)
> > +               pr_warn("%s: Warning: CPU count mismatch %u != %u.\n",
> > +                       __func__, ctx->first.cpu_count, ctx->second.cpu_count);
> > +
> > +       for (cpu_1 = 0; cpu_1 < ctx->first.cpu_count; cpu_1++) {
> > +               unsigned int cpu_2;
> > +               struct cpu_properties *cp_1 = &ctx->first.cp[cpu_1];
> > +
> > +               for (cpu_2 = 0; cpu_2 < ctx->second.cpu_count; cpu_2++) {
> > +                       struct cpu_properties *cp_2 = &ctx->second.cp[cpu_2];
> > +
> > +                       if (cp_1->hwid != cp_2->hwid)
> > +                               continue;
> > +
> > +                       if (!kexec_cpu_check(cp_1, cp_2))
> > +                               return -EINVAL;
> > +
> > +                       to_process--;
> > +               }
> > +       }
> > +
> > +       if (to_process) {
> > +               pr_warn("%s: Warning: Failed to process %u CPUs.\n", __func__,
> > +                       to_process);
> > +               return -EINVAL;
> > +       }
> > +
> > +       return 0;
> > +}
> 
> I don't see the point in checking this in the kernel. If I pass the
> second stage kernel a new dtb where my enable methods are different,
> that was my choice as a user. If that doesn't work, that's my fault.
> 
> There are plenty of other things that might be completely different that
> we don't sanity check, so I don't see why enable methods should be any
> different.
> 
> [...]
> 
> > +/**
> > + * kexec_check_cpu_die -  Check if cpu_die() will work on secondary processors.
> > + */
> > +
> > +static int kexec_check_cpu_die(void)
> > +{
> > +       unsigned int cpu;
> > +       unsigned int sum = 0;
> > +
> > +       /* For simplicity this also checks the primary CPU. */
> > +
> > +       for_each_cpu(cpu, cpu_all_mask) {
> > +               if (cpu && (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_disable ||
> > +                       cpu_ops[cpu]->cpu_disable(cpu))) {
> > +                       sum++;
> > +                       pr_err("%s: Error: "
> > +                               "CPU %u does not support hot un-plug.\n",
> > +                               __func__, cpu);
> > +               }
> > +       }
> > +
> > +       return sum ? -EOPNOTSUPP : 0;
> > +}
> 
> We should really use disable_nonboot_cpus() for this. That way we don't
> end up with a slightly different hotplug implementation for kexec. The
> above is missing cpu_kill calls, for example, and I'm worried by the
> possibility of further drift over time.
> 
> I understand from our face-to-face discussion that you didn't want to
> require the PM infrastructure that disable_nonboot_cpus currently pulls
> in due to the being dependent on CONFIG_PM_SLEEP_SMP which selects
> CONFIG_PM_SLEEP and so on. The solution to that is to refactor the
> Kconfig so we can have disable_nonboot_cpus without all the other PM
> infrastructure.

I switch the current patch to use disable_nonboot_cpus().

> > +
> > +/**
> > + * machine_kexec_prepare - Prepare for a kexec reboot.
> > + *
> > + * Called from the core kexec code when a kernel image is loaded.
> > + */
> > +
> > +int machine_kexec_prepare(struct kimage *image)
> > +{
> > +       int result;
> 
> This seems to always be an error code. Call it 'err'.
> 
> > +       dtb_t *dtb = NULL;
> > +       struct kexec_ctx *ctx;
> > +       const struct kexec_segment *dtb_seg;
> > +
> > +       kexec_image_info(image);
> > +
> > +       result = kexec_check_cpu_die();
> > +
> > +       if (result)
> > +               goto on_error;
> > +
> > +       result = kexec_ctx_alloc(image);
> > +
> > +       if (result)
> > +               goto on_error;
> > +
> > +       ctx = kexec_image_to_ctx(image);
> > +
> > +       result = kexec_boot_info_init(&ctx->first, NULL);
> > +
> > +       if (result)
> > +               goto on_error;
> > +
> > +       dtb_seg = kexec_find_dtb_seg(image);
> > +
> > +       if (!dtb_seg) {
> > +               result = -EINVAL;
> > +               goto on_error;
> > +       }
> > +
> > +       result = kexec_copy_dtb(dtb_seg, &dtb);
> > +
> > +       if (result)
> > +               goto on_error;
> > +
> > +       result = kexec_boot_info_init(&ctx->second, dtb);
> > +
> > +       if (result)
> > +               goto on_error;
> > +
> > +       result = kexec_compat_check(ctx);
> > +
> > +       if (result)
> > +               goto on_error;
> > +
> > +       kexec_dtb_addr = dtb_seg->mem;
> > +       kexec_kimage_start = image->start;
> > +
> > +       goto on_exit;
> > +
> > +on_error:
> > +       kexec_ctx_clean(image);
> > +on_exit:
> 
> on_* looks weird, and doesn't match the style of other labels in
> arch/arm64. Could we call these 'out_clean' and 'out' instead?
> 
> > +       kfree(dtb);
> > +       return result;
> > +}
> > +
> > +/**
> > + * kexec_list_flush_cb - Callback to flush the kimage list to PoC.
> > + */
> > +
> > +static void kexec_list_flush_cb(void *ctx , unsigned int flag,
> > +       void *addr, void *dest)
> > +{
> > +       switch (flag) {
> > +       case IND_INDIRECTION:
> > +       case IND_SOURCE:
> > +               __flush_dcache_area(addr, PAGE_SIZE);
> 
> Is PAGE_SIZE always big enough? Do we not have a more accurate size?
> Perhaps I've misunderstood what's going on here.

The image list is a list of pages, so PAGE_SIZE should be OK.

> > +               break;
> > +       default:
> > +               break;
> > +       }
> > +}
> > +
> > +/**
> > + * machine_kexec - Do the kexec reboot.
> > + *
> > + * Called from the core kexec code for a sys_reboot with LINUX_REBOOT_CMD_KEXEC.
> > + */
> > +
> > +void machine_kexec(struct kimage *image)
> > +{
> > +       phys_addr_t reboot_code_buffer_phys;
> > +       void *reboot_code_buffer;
> > +       struct kexec_ctx *ctx = kexec_image_to_ctx(image);
> > +
> > +       BUG_ON(relocate_new_kernel_size > KEXEC_CONTROL_PAGE_SIZE);
> 
> It looks like relocate_new_kernel_size is a build-time constant. If we
> need that to be less than KEXEC_CONTROL_PAGE_SIZE, then we should make
> that a build-time check.

I moved this check into relocate_new_kernel with a
'.org KEXEC_CONTROL_PAGE_SIZE'.

> > +       BUG_ON(num_online_cpus() > 1);
> > +       BUG_ON(!ctx);
> > +
> > +       kexec_image_info(image);
> > +
> > +       kexec_kimage_head = image->head;
> > +
> > +       reboot_code_buffer_phys = page_to_phys(image->control_code_page);
> > +       reboot_code_buffer = phys_to_virt(reboot_code_buffer_phys);
> > +
> > +       pr_devel("%s:%d: control_code_page:        %p\n", __func__, __LINE__,
> > +               (void *)image->control_code_page);
> 
> This is already a pointer. Is the cast to void necessary?
> 
> > +       pr_devel("%s:%d: reboot_code_buffer_phys:  %p\n", __func__, __LINE__,
> > +               (void *)reboot_code_buffer_phys);
> 
> Use %pa and pass &reboot_code_buffer_phys, no cast necessary.
> 
> > +       pr_devel("%s:%d: reboot_code_buffer:       %p\n", __func__, __LINE__,
> > +               reboot_code_buffer);
> > +       pr_devel("%s:%d: relocate_new_kernel:      %p\n", __func__, __LINE__,
> > +               relocate_new_kernel);
> > +       pr_devel("%s:%d: relocate_new_kernel_size: %lxh(%lu) bytes\n", __func__,
> > +               __LINE__, relocate_new_kernel_size, relocate_new_kernel_size);
> 
> Please use an '0x' prefix rather than a 'h' suffix. Do we need in print
> in both hex and decimal?
> 
> > +
> > +       pr_devel("%s:%d: kexec_dtb_addr:           %p\n", __func__, __LINE__,
> > +               (void *)kexec_dtb_addr);
> > +       pr_devel("%s:%d: kexec_kimage_head:        %p\n", __func__, __LINE__,
> > +               (void *)kexec_kimage_head);
> > +       pr_devel("%s:%d: kexec_kimage_start:       %p\n", __func__, __LINE__,
> > +               (void *)kexec_kimage_start);
> 
> These are all unsigned long, so why not use the existing mechanism for
> printing unsigned long?
> 
> > +
> > +       /*
> > +        * Copy relocate_new_kernel to the reboot_code_buffer for use
> > +        * after the kernel is shut down.
> > +        */
> > +
> > +       memcpy(reboot_code_buffer, relocate_new_kernel,
> > +               relocate_new_kernel_size);
> > +
> > +       /* Assure reboot_code_buffer is copied. */
> > +
> > +       mb();
> 
> I don't think we need the mb if this is only to guarantee completion
> before the cache flush -- cacheable memory accesses should hazard
> against cache flushes by VA.

OK.

> > +
> > +       pr_info("Bye!\n");
> > +
> > +       local_disable(DAIF_ALL);
> 
> We can move these two right before the soft_restart, after the cache
> maintenance. That way the print is closer to the exit of the current
> kernel.

OK.

> > +
> > +       /* Flush the reboot_code_buffer in preparation for its execution. */
> > +
> > +       __flush_dcache_area(reboot_code_buffer, relocate_new_kernel_size);
> > +
> > +       /* Flush the kimage list. */
> > +
> > +       kexec_list_walk(NULL, image->head, kexec_list_flush_cb);
> > +
> > +       soft_restart(reboot_code_buffer_phys);
> > +}
> > +
> > +void machine_crash_shutdown(struct pt_regs *regs)
> > +{
> > +       /* Empty routine needed to avoid build errors. */
> > +}
> > diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S
> > new file mode 100644
> > index 0000000..92aba9d
> > --- /dev/null
> > +++ b/arch/arm64/kernel/relocate_kernel.S
> > @@ -0,0 +1,185 @@
> > +/*
> > + * kexec for arm64
> > + *
> > + * Copyright (C) Linaro.
> > + *
> > + * This program is free software; you can redistribute it and/or modify
> > + * it under the terms of the GNU General Public License version 2 as
> > + * published by the Free Software Foundation.
> > + */
> > +
> > +#include <asm/assembler.h>
> > +#include <asm/memory.h>
> > +#include <asm/page.h>
> > +
> > +/* The list entry flags. */
> > +
> > +#define IND_DESTINATION_BIT 0
> > +#define IND_INDIRECTION_BIT 1
> > +#define IND_DONE_BIT        2
> > +#define IND_SOURCE_BIT      3
> 
> Given these ned to match the existing IND_* flags in
> include/linux/kexec.h, and they aren't in any way specific to arm64,
> please put these ina an asm-generic header and redefine the existing
> IND_* flags in terms of them.

See my patch that does that:

  https://lists.ozlabs.org/pipermail/linuxppc-dev/2014-August/120368.html
> 
> > +
> > +/*
> > + * relocate_new_kernel - Put the 2nd stage kernel image in place and boot it.
> > + *
> > + * The memory that the old kernel occupies may be overwritten when coping the
> > + * new kernel to its final location.  To assure that the relocate_new_kernel
> > + * routine which does that copy is not overwritten all code and data needed
> > + * by relocate_new_kernel must be between the symbols relocate_new_kernel and
> > + * relocate_new_kernel_end.  The machine_kexec() routine will copy
> > + * relocate_new_kernel to the kexec control_code_page, a special page which
> > + * has been set up to be preserved during the kernel copy operation.
> > + */
> > +
> > +.align 3
> 
> Surely this isn't necessary?

No, the code should be properly aligned.

> > +
> > +.globl relocate_new_kernel
> > +relocate_new_kernel:
> > +
> > +       /* Setup the list loop variables. */
> > +
> > +       ldr     x10, kexec_kimage_head          /* x10 = list entry */
> 
> Any reason for using x10 rather than starting with x0? Or x18, if you
> need to preserve the low registers?
> 
> > +
> > +       mrs     x0, ctr_el0
> > +       ubfm    x0, x0, #16, #19
> > +       mov     x11, #4
> > +       lsl     x11, x11, x0                    /* x11 = dcache line size */
> 
> Any reason we can't use dcache_line_size, given it's a macro?
> 
> > +
> > +       mov     x12, xzr                        /* x12 = segment start */
> > +       mov     x13, xzr                        /* x13 = entry ptr */
> > +       mov     x14, xzr                        /* x14 = copy dest */
> > +
> > +       /* Check if the new kernel needs relocation. */
> > +
> > +       cbz     x10, .Ldone
> > +       tbnz    x10, IND_DONE_BIT, .Ldone
> > +
> > +.Lloop:
> 
> Is there any reason for the '.L' on all of these? We only seem to do
> that in the lib code that was imported from elsewhere, and it doesn't
> match the rest of the arm64 asm.

.L is a local label prefix in gas.  I don't think it would be good to have
these with larger scope.

> > +       and     x15, x10, PAGE_MASK             /* x15 = addr */
> > +
> > +       /* Test the entry flags. */
> > +
> > +.Ltest_source:
> > +       tbz     x10, IND_SOURCE_BIT, .Ltest_indirection
> > +
> > +       /* copy_page(x20 = dest, x21 = src) */
> > +
> > +       mov x20, x14
> > +       mov x21, x15
> > +
> > +1:     ldp     x22, x23, [x21]
> > +       ldp     x24, x25, [x21, #16]
> > +       ldp     x26, x27, [x21, #32]
> > +       ldp     x28, x29, [x21, #48]
> > +       add     x21, x21, #64
> > +       stnp    x22, x23, [x20]
> > +       stnp    x24, x25, [x20, #16]
> > +       stnp    x26, x27, [x20, #32]
> > +       stnp    x28, x29, [x20, #48]
> > +       add     x20, x20, #64
> > +       tst     x21, #(PAGE_SIZE - 1)
> > +       b.ne    1b
> 
> It's a shame we can't reuse copy_page directly. Could we not move the
> body to a macro we can reuse here?

copy_page() also does some memory pre-fetch, which Arun said caused
problems on the APM board.  If that board were available to me for
testing I could investigate, but at this time I will put this suggestion
on my todo list.

> > +
> > +       /* dest += PAGE_SIZE */
> > +
> > +       add     x14, x14, PAGE_SIZE
> > +       b       .Lnext
> > +
> > +.Ltest_indirection:
> > +       tbz     x10, IND_INDIRECTION_BIT, .Ltest_destination
> > +
> > +       /* ptr = addr */
> > +
> > +       mov     x13, x15
> > +       b       .Lnext
> > +
> > +.Ltest_destination:
> > +       tbz     x10, IND_DESTINATION_BIT, .Lnext
> > +
> > +       /* flush segment */
> > +
> > +       bl      .Lflush
> > +       mov     x12, x15
> > +
> > +       /* dest = addr */
> > +
> > +       mov     x14, x15
> > +
> > +.Lnext:
> > +       /* entry = *ptr++ */
> > +
> > +       ldr     x10, [x13]
> > +       add     x13, x13, 8
> 
> This can be:
> 
> 	ldr	x10, [x13], #8
> 
> > +
> > +       /* while (!(entry & DONE)) */
> > +
> > +       tbz     x10, IND_DONE_BIT, .Lloop
> > +
> > +.Ldone:
> > +       /* flush last segment */
> > +
> > +       bl      .Lflush
> > +
> > +       dsb     sy
> > +       isb
> > +       ic      ialluis
> 
> This doesn't look right; we need a dsb and an isb after the instruction
> cache maintenance (or the icache could still be flushing when we branch
> to the new kernel).

OK.

> > +
> > +       /* start_new_kernel */
> > +
> > +       ldr     x4, kexec_kimage_start
> > +       ldr     x0, kexec_dtb_addr
> > +       mov     x1, xzr
> > +       mov     x2, xzr
> > +       mov     x3, xzr
> > +       br      x4
> > +
> > +/* flush - x11 = line size, x12 = start addr, x14 = end addr. */
> > +
> > +.Lflush:
> > +       cbz     x12, 2f
> > +       mov     x0, x12
> > +       sub     x1, x11, #1
> > +       bic     x0, x0, x1
> > +1:     dc      civac, x0
> > +       add     x0, x0, x11
> > +       cmp     x0, x14
> > +       b.lo    1b
> > +2:     ret
> 
> It would be nice if this were earlier in the file, before its callers.

Then we would need to jump over it, which I don't think is
very clean.

> 
> > +
> > +.align 3
> 
> We should have a comment as to why this is needed (to keep the 64-bit
> values below naturally aligned).

I haven't seen such an .align directive comment in any arm64 code yet.

-Geoff

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH 11/13] arm64/kexec: Add core kexec support
@ 2014-09-25  0:25       ` Geoff Levand
  0 siblings, 0 replies; 80+ messages in thread
From: Geoff Levand @ 2014-09-25  0:25 UTC (permalink / raw)
  To: Mark Rutland
  Cc: Marc Zyngier, Catalin Marinas, Will Deacon, linux-arm-kernel,
	kexec, christoffer.dall

Hi Mark,

On Thu, 2014-09-18 at 02:13 +0100, Mark Rutland wrote:
> On Tue, Sep 09, 2014 at 11:49:05PM +0100, Geoff Levand wrote:

> > +++ b/arch/arm64/include/asm/kexec.h
> > @@ -0,0 +1,52 @@
> > +/*
> > + * kexec for arm64
> > + *
> > + * Copyright (C) Linaro.
> > + *
> > + * This program is free software; you can redistribute it and/or modify
> > + * it under the terms of the GNU General Public License version 2 as
> > + * published by the Free Software Foundation.
> > + */
> > +
> > +#if !defined(_ARM64_KEXEC_H)
> > +#define _ARM64_KEXEC_H
> > +
> > +/* Maximum physical address we can use pages from */
> > +
> > +#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
> > +
> > +/* Maximum address we can reach in physical address mode */
> > +
> > +#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
> > +
> > +/* Maximum address we can use for the control code buffer */
> > +
> > +#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL)
> > +
> 
> What are these used for? I see that other architectures seem to do the
> same thing, but they look odd.

They need to be defined for the core kexec code, but arm64
doesn't use them.

> > +#define KEXEC_CONTROL_PAGE_SIZE        4096
> 
> What's this used for?

This is the size reserved for the reboot_code_buffer defined in
kexec's core code.  For arm64, we copy our relocate_new_kernel
routine into the reboot_code_buffer.

> Does this work with 64k pages, and is there any reason we can't figure
> out the actual size of the code (so we don't get bitten if it grows)?

Kexec will reserve pages to satisfy KEXEC_CONTROL_PAGE_SIZE, so for
all arm64 page configs one page will be reserved for this value (4096).

I have a check if relocate_new_kernel is too big
'.org KEXEC_CONTROL_PAGE_SIZE' in the latest implementation.

> > +
> > +#define KEXEC_ARCH KEXEC_ARCH_ARM64
> > +
> > +#define ARCH_HAS_KIMAGE_ARCH
> > +
> > +#if !defined(__ASSEMBLY__)
> > +
> > +struct kimage_arch {
> > +       void *ctx;
> > +};
> > +
> > +/**
> > + * crash_setup_regs() - save registers for the panic kernel
> > + *
> > + * @newregs: registers are saved here
> > + * @oldregs: registers to be saved (may be %NULL)
> > + */
> > +
> > +static inline void crash_setup_regs(struct pt_regs *newregs,
> > +                                   struct pt_regs *oldregs)
> > +{
> > +}
> 
> It would be nice to know what we're going to do for this.
> 
> Is this a required function, or can we get away without crash kernel
> support for the moment?

This is just to avoid a build error.  It is not used for kexec re-boot.

> > +
> > +#endif /* !defined(__ASSEMBLY__) */
> > +
> > +#endif
> > diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
> > index df7ef87..8b7c029 100644
> > --- a/arch/arm64/kernel/Makefile
> > +++ b/arch/arm64/kernel/Makefile
> > @@ -29,6 +29,8 @@ arm64-obj-$(CONFIG_ARM64_CPU_SUSPEND) += sleep.o suspend.o
> >  arm64-obj-$(CONFIG_JUMP_LABEL)         += jump_label.o
> >  arm64-obj-$(CONFIG_KGDB)               += kgdb.o
> >  arm64-obj-$(CONFIG_EFI)                        += efi.o efi-stub.o efi-entry.o
> > +arm64-obj-$(CONFIG_KEXEC)              += machine_kexec.o relocate_kernel.o    \
> > +                                          cpu-properties.o
> >
> >  obj-y                                  += $(arm64-obj-y) vdso/
> >  obj-m                                  += $(arm64-obj-m)
> > diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
> > new file mode 100644
> > index 0000000..043a3bc
> > --- /dev/null
> > +++ b/arch/arm64/kernel/machine_kexec.c
> > @@ -0,0 +1,612 @@
> > +/*
> > + * kexec for arm64
> > + *
> > + * Copyright (C) Linaro.
> > + *
> > + * This program is free software; you can redistribute it and/or modify
> > + * it under the terms of the GNU General Public License version 2 as
> > + * published by the Free Software Foundation.
> > + */
> > +
> > +#include <linux/kernel.h>
> > +#include <linux/kexec.h>
> > +#include <linux/of_fdt.h>
> > +#include <linux/slab.h>
> > +#include <linux/uaccess.h>
> > +
> > +#include <asm/cacheflush.h>
> > +#include <asm/cpu_ops.h>
> > +#include <asm/system_misc.h>
> > +
> > +#include "cpu-properties.h"
> > +
> > +#if defined(DEBUG)
> > +static const int debug = 1;
> > +#else
> > +static const int debug;
> > +#endif
> 
> I don't think we need this.

I put the debug output into another patch, which I'll
decide to post or not later.

> > +
> > +typedef struct dtb_buffer {char b[0]; } dtb_t;
> 
> It would be nice for this to be consistent with other dtb uses; if we
> need a dtb type then it shouldn't be specific to kexec.

This was to avoid errors due to the lack of type checking with
void* types.  I've reworked this in the latest patch.

> > +static struct kexec_ctx *current_ctx;
> > +
> > +static int kexec_ctx_alloc(struct kimage *image)
> > +{
> > +       BUG_ON(image->arch.ctx);
> > +
> > +       image->arch.ctx = kmalloc(sizeof(struct kexec_ctx), GFP_KERNEL);
> > +
> > +       if (!image->arch.ctx)
> > +               return -ENOMEM;
> > +
> > +       current_ctx = (struct kexec_ctx *)image->arch.ctx;
> 
> This seems to be the only use of current_ctx. I take it this is a
> leftover from debugging?
> 
> [...]
> 
> > +/**
> > + * kexec_list_walk - Helper to walk the kimage page list.
> > + */
> 
> Please keep this associated with the function it refers to (nothing
> should be between this comment and the function prototype).
> 
> > +
> > +#define IND_FLAGS (IND_DESTINATION | IND_INDIRECTION | IND_DONE | IND_SOURCE)
> 
> Can't this live in include/linux/kexec.h, where these flags are defined.

I have a kexec patch submitted to clean this up.  I'll re-factor
this when that patch is upstream.

  https://lists.ozlabs.org/pipermail/linuxppc-dev/2014-August/120368.html

> The meaning of these doesn't seem to be documented anywhere. Would you
> be able to explain what each of these means?

I think lack of comments/documentation is a general weakness of the
core kexec code.

> > +static void kexec_list_walk(void *ctx, unsigned long kimage_head,
> > +       void (*cb)(void *ctx, unsigned int flag, void *addr, void *dest))
> > +{
> > +       void *dest;
> > +       unsigned long *entry;
> > +
> > +       for (entry = &kimage_head, dest = NULL; ; entry++) {
> > +               unsigned int flag = *entry & IND_FLAGS;
> > +               void *addr = phys_to_virt(*entry & PAGE_MASK);
> > +
> > +               switch (flag) {
> > +               case IND_INDIRECTION:
> > +                       entry = (unsigned long *)addr - 1;
> > +                       cb(ctx, flag, addr, NULL);
> > +                       break;
> > +               case IND_DESTINATION:
> > +                       dest = addr;
> > +                       cb(ctx, flag, addr, NULL);
> > +                       break;
> > +               case IND_SOURCE:
> > +                       cb(ctx, flag, addr, dest);
> > +                       dest += PAGE_SIZE;
> 
> I really don't understand what's going on with dest here, but that's
> probably because I don't understand the meaning of the flags.

IND_SOURCE means the entry is a page of the current segment.  dest is
the address of that page.  When you have a new source page the
destination is post incremented.  Think foo(src, dest++).

> > +                       break;
> > +               case IND_DONE:
> > +                       cb(ctx, flag , NULL, NULL);
> > +                       return;
> > +               default:
> > +                       pr_devel("%s:%d unknown flag %xh\n", __func__, __LINE__,
> > +                               flag);
> 
> Wouldn't pr_warn would be more appropriate here?

We don't really don't need a message since the IND_ flags are well
established.  I'll remove this.

> 
> > +                       cb(ctx, flag, addr, NULL);
> > +                       break;
> > +               }
> > +       }
> > +}
> > +
> > +/**
> > + * kexec_image_info - For debugging output.
> > + */
> > +
> > +#define kexec_image_info(_i) _kexec_image_info(__func__, __LINE__, _i)
> > +static void _kexec_image_info(const char *func, int line,
> > +       const struct kimage *image)
> > +{
> > +       if (debug) {
> > +               unsigned long i;
> > +
> > +               pr_devel("%s:%d:\n", func, line);
> > +               pr_devel("  kexec image info:\n");
> > +               pr_devel("    type:        %d\n", image->type);
> > +               pr_devel("    start:       %lx\n", image->start);
> > +               pr_devel("    head:        %lx\n", image->head);
> > +               pr_devel("    nr_segments: %lu\n", image->nr_segments);
> > +
> > +               for (i = 0; i < image->nr_segments; i++) {
> > +                       pr_devel("      segment[%lu]: %016lx - %016lx, "
> > +                               "%lxh bytes, %lu pages\n",
> > +                               i,
> > +                               image->segment[i].mem,
> > +                               image->segment[i].mem + image->segment[i].memsz,
> > +                               image->segment[i].memsz,
> > +                               image->segment[i].memsz /  PAGE_SIZE);
> > +
> > +                       if (kexec_is_dtb_user(image->segment[i].buf))
> > +                               pr_devel("        dtb segment\n");
> > +               }
> > +       }
> > +}
> 
> pr_devel is already dependent on DEBUG, so surely we don't need to check
> the debug variable?

I'm not sure how much of this would be removed as dead code.  If
the compiler is cleaver enough it all should be.

> > +
> > +/**
> > + * kexec_find_dtb_seg - Helper routine to find the dtb segment.
> > + */
> > +
> > +static const struct kexec_segment *kexec_find_dtb_seg(
> > +       const struct kimage *image)
> > +{
> > +       int i;
> > +
> > +       for (i = 0; i < image->nr_segments; i++) {
> > +               if (kexec_is_dtb_user(image->segment[i].buf))
> > +                       return &image->segment[i];
> > +       }
> > +
> > +       return NULL;
> > +}
> 
> I'm really not keen on having the kernel guess which blobs need special
> treatment, though we seem to do that for arm.

Yes, to pass the dtb in r0 when th new kernel is entered.

> It would be far nicer if we could pass flags for each segment to
> describe what it is (e.g. kernel image, dtb, other binary blob), 

Well, we do pass a flag of sorts, the DT magic value.

> so we
> can do things like pass multiple DTBs (so we load two kernels at once
> and pass each a unique DTB if we want to boot a new kernel + crashkernel
> pair). Unfortunately that would require some fairly invasive rework of
> the kexec core.

I don't think I'll attempt that any time soon.  Feel free to
give it a try.

> For secureboot we can't trust a dtb from userspace, and will have to use
> kexec_file_load. To work with that we can either:
> 
> * Reuse the original DTB, patched with the new command line. This may
>   have statefulness issues (for things like simplefb).
> 
> * Build a new DTB by flattening the current live tree. This would rely
>   on drivers that modify state to patch the tree appropriately.

I have not yet looked into how to do this yet.

> [...]
> 
> > +/**
> > + * kexec_cpu_info_init - Initialize an array of kexec_cpu_info structures.
> > + *
> > + * Allocates a cpu info array and fills it with info for all cpus found in
> > + * the device tree passed.
> > + */
> > +
> > +static int kexec_cpu_info_init(const struct device_node *dn,
> > +       struct kexec_boot_info *info)
> > +{
> > +       int result;
> > +       unsigned int cpu;
> > +
> > +       info->cp = kmalloc(
> > +               info->cpu_count * sizeof(*info->cp), GFP_KERNEL);
> > +
> > +       if (!info->cp) {
> > +               pr_err("%s: Error: Out of memory.", __func__);
> > +               return -ENOMEM;
> > +       }
> > +
> > +       for (cpu = 0; cpu < info->cpu_count; cpu++) {
> > +               struct cpu_properties *cp = &info->cp[cpu];
> > +
> > +               dn = of_find_node_by_type((struct device_node *)dn, "cpu");
> > +
> > +               if (!dn) {
> > +                       pr_devel("%s:%d: bad node\n", __func__, __LINE__);
> > +                       goto on_error;
> > +               }
> > +
> > +               result = read_cpu_properties(cp, dn);
> > +
> > +               if (result) {
> > +                       pr_devel("%s:%d: bad node\n", __func__, __LINE__);
> > +                       goto on_error;
> > +               }
> > +
> > +               if (cp->type == cpu_enable_method_psci)
> > +                       pr_devel("%s:%d: cpu-%u: hwid-%llx, '%s'\n",
> > +                               __func__, __LINE__, cpu, cp->hwid,
> > +                               cp->enable_method);
> > +               else
> > +                       pr_devel("%s:%d: cpu-%u: hwid-%llx, '%s', "
> > +                               "cpu-release-addr %llx\n",
> > +                               __func__, __LINE__, cpu, cp->hwid,
> > +                               cp->enable_method,
> > +                               cp->cpu_release_addr);
> > +       }
> > +
> > +       return 0;
> > +
> > +on_error:
> > +       kfree(info->cp);
> > +       info->cp = NULL;
> > +       return -EINVAL;
> > +}
> 
> I don't see why we should need this at all. If we use the hotplug
> infrastructure, we don't need access to the enable-method and related
> properties, and the kexec code need only deal with a single CPU.

I removed all the checking in the latest patch.

> The only case where kexec needs to deal with other CPUs is when some are
> sat in the holding pen, but this code doesn't seem to handle that.
> 
> as I believe I mentioned before, we should be able to extend the holding
> pen code to get those CPUs to increment a sat-in-pen counter and if
> that's non-zero after SMP bringup we print a warning (and disallow
> kexec).

I have some work-in-progress patches that try to do this, but I will not
include those in this series.  See my spin-table branch:

  https://git.linaro.org/people/geoff.levand/linux-kexec.git

> > +/**
> > +* kexec_compat_check - Iterator for kexec_cpu_check.
> > +*/
> > +
> > +static int kexec_compat_check(const struct kexec_ctx *ctx)
> > +{
> > +       unsigned int cpu_1;
> > +       unsigned int to_process;
> > +
> > +       to_process = min(ctx->first.cpu_count, ctx->second.cpu_count);
> > +
> > +       if (ctx->first.cpu_count != ctx->second.cpu_count)
> > +               pr_warn("%s: Warning: CPU count mismatch %u != %u.\n",
> > +                       __func__, ctx->first.cpu_count, ctx->second.cpu_count);
> > +
> > +       for (cpu_1 = 0; cpu_1 < ctx->first.cpu_count; cpu_1++) {
> > +               unsigned int cpu_2;
> > +               struct cpu_properties *cp_1 = &ctx->first.cp[cpu_1];
> > +
> > +               for (cpu_2 = 0; cpu_2 < ctx->second.cpu_count; cpu_2++) {
> > +                       struct cpu_properties *cp_2 = &ctx->second.cp[cpu_2];
> > +
> > +                       if (cp_1->hwid != cp_2->hwid)
> > +                               continue;
> > +
> > +                       if (!kexec_cpu_check(cp_1, cp_2))
> > +                               return -EINVAL;
> > +
> > +                       to_process--;
> > +               }
> > +       }
> > +
> > +       if (to_process) {
> > +               pr_warn("%s: Warning: Failed to process %u CPUs.\n", __func__,
> > +                       to_process);
> > +               return -EINVAL;
> > +       }
> > +
> > +       return 0;
> > +}
> 
> I don't see the point in checking this in the kernel. If I pass the
> second stage kernel a new dtb where my enable methods are different,
> that was my choice as a user. If that doesn't work, that's my fault.
> 
> There are plenty of other things that might be completely different that
> we don't sanity check, so I don't see why enable methods should be any
> different.
> 
> [...]
> 
> > +/**
> > + * kexec_check_cpu_die -  Check if cpu_die() will work on secondary processors.
> > + */
> > +
> > +static int kexec_check_cpu_die(void)
> > +{
> > +       unsigned int cpu;
> > +       unsigned int sum = 0;
> > +
> > +       /* For simplicity this also checks the primary CPU. */
> > +
> > +       for_each_cpu(cpu, cpu_all_mask) {
> > +               if (cpu && (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_disable ||
> > +                       cpu_ops[cpu]->cpu_disable(cpu))) {
> > +                       sum++;
> > +                       pr_err("%s: Error: "
> > +                               "CPU %u does not support hot un-plug.\n",
> > +                               __func__, cpu);
> > +               }
> > +       }
> > +
> > +       return sum ? -EOPNOTSUPP : 0;
> > +}
> 
> We should really use disable_nonboot_cpus() for this. That way we don't
> end up with a slightly different hotplug implementation for kexec. The
> above is missing cpu_kill calls, for example, and I'm worried by the
> possibility of further drift over time.
> 
> I understand from our face-to-face discussion that you didn't want to
> require the PM infrastructure that disable_nonboot_cpus currently pulls
> in due to the being dependent on CONFIG_PM_SLEEP_SMP which selects
> CONFIG_PM_SLEEP and so on. The solution to that is to refactor the
> Kconfig so we can have disable_nonboot_cpus without all the other PM
> infrastructure.

I switch the current patch to use disable_nonboot_cpus().

> > +
> > +/**
> > + * machine_kexec_prepare - Prepare for a kexec reboot.
> > + *
> > + * Called from the core kexec code when a kernel image is loaded.
> > + */
> > +
> > +int machine_kexec_prepare(struct kimage *image)
> > +{
> > +       int result;
> 
> This seems to always be an error code. Call it 'err'.
> 
> > +       dtb_t *dtb = NULL;
> > +       struct kexec_ctx *ctx;
> > +       const struct kexec_segment *dtb_seg;
> > +
> > +       kexec_image_info(image);
> > +
> > +       result = kexec_check_cpu_die();
> > +
> > +       if (result)
> > +               goto on_error;
> > +
> > +       result = kexec_ctx_alloc(image);
> > +
> > +       if (result)
> > +               goto on_error;
> > +
> > +       ctx = kexec_image_to_ctx(image);
> > +
> > +       result = kexec_boot_info_init(&ctx->first, NULL);
> > +
> > +       if (result)
> > +               goto on_error;
> > +
> > +       dtb_seg = kexec_find_dtb_seg(image);
> > +
> > +       if (!dtb_seg) {
> > +               result = -EINVAL;
> > +               goto on_error;
> > +       }
> > +
> > +       result = kexec_copy_dtb(dtb_seg, &dtb);
> > +
> > +       if (result)
> > +               goto on_error;
> > +
> > +       result = kexec_boot_info_init(&ctx->second, dtb);
> > +
> > +       if (result)
> > +               goto on_error;
> > +
> > +       result = kexec_compat_check(ctx);
> > +
> > +       if (result)
> > +               goto on_error;
> > +
> > +       kexec_dtb_addr = dtb_seg->mem;
> > +       kexec_kimage_start = image->start;
> > +
> > +       goto on_exit;
> > +
> > +on_error:
> > +       kexec_ctx_clean(image);
> > +on_exit:
> 
> on_* looks weird, and doesn't match the style of other labels in
> arch/arm64. Could we call these 'out_clean' and 'out' instead?
> 
> > +       kfree(dtb);
> > +       return result;
> > +}
> > +
> > +/**
> > + * kexec_list_flush_cb - Callback to flush the kimage list to PoC.
> > + */
> > +
> > +static void kexec_list_flush_cb(void *ctx , unsigned int flag,
> > +       void *addr, void *dest)
> > +{
> > +       switch (flag) {
> > +       case IND_INDIRECTION:
> > +       case IND_SOURCE:
> > +               __flush_dcache_area(addr, PAGE_SIZE);
> 
> Is PAGE_SIZE always big enough? Do we not have a more accurate size?
> Perhaps I've misunderstood what's going on here.

The image list is a list of pages, so PAGE_SIZE should be OK.

> > +               break;
> > +       default:
> > +               break;
> > +       }
> > +}
> > +
> > +/**
> > + * machine_kexec - Do the kexec reboot.
> > + *
> > + * Called from the core kexec code for a sys_reboot with LINUX_REBOOT_CMD_KEXEC.
> > + */
> > +
> > +void machine_kexec(struct kimage *image)
> > +{
> > +       phys_addr_t reboot_code_buffer_phys;
> > +       void *reboot_code_buffer;
> > +       struct kexec_ctx *ctx = kexec_image_to_ctx(image);
> > +
> > +       BUG_ON(relocate_new_kernel_size > KEXEC_CONTROL_PAGE_SIZE);
> 
> It looks like relocate_new_kernel_size is a build-time constant. If we
> need that to be less than KEXEC_CONTROL_PAGE_SIZE, then we should make
> that a build-time check.

I moved this check into relocate_new_kernel with a
'.org KEXEC_CONTROL_PAGE_SIZE'.

> > +       BUG_ON(num_online_cpus() > 1);
> > +       BUG_ON(!ctx);
> > +
> > +       kexec_image_info(image);
> > +
> > +       kexec_kimage_head = image->head;
> > +
> > +       reboot_code_buffer_phys = page_to_phys(image->control_code_page);
> > +       reboot_code_buffer = phys_to_virt(reboot_code_buffer_phys);
> > +
> > +       pr_devel("%s:%d: control_code_page:        %p\n", __func__, __LINE__,
> > +               (void *)image->control_code_page);
> 
> This is already a pointer. Is the cast to void necessary?
> 
> > +       pr_devel("%s:%d: reboot_code_buffer_phys:  %p\n", __func__, __LINE__,
> > +               (void *)reboot_code_buffer_phys);
> 
> Use %pa and pass &reboot_code_buffer_phys, no cast necessary.
> 
> > +       pr_devel("%s:%d: reboot_code_buffer:       %p\n", __func__, __LINE__,
> > +               reboot_code_buffer);
> > +       pr_devel("%s:%d: relocate_new_kernel:      %p\n", __func__, __LINE__,
> > +               relocate_new_kernel);
> > +       pr_devel("%s:%d: relocate_new_kernel_size: %lxh(%lu) bytes\n", __func__,
> > +               __LINE__, relocate_new_kernel_size, relocate_new_kernel_size);
> 
> Please use an '0x' prefix rather than a 'h' suffix. Do we need in print
> in both hex and decimal?
> 
> > +
> > +       pr_devel("%s:%d: kexec_dtb_addr:           %p\n", __func__, __LINE__,
> > +               (void *)kexec_dtb_addr);
> > +       pr_devel("%s:%d: kexec_kimage_head:        %p\n", __func__, __LINE__,
> > +               (void *)kexec_kimage_head);
> > +       pr_devel("%s:%d: kexec_kimage_start:       %p\n", __func__, __LINE__,
> > +               (void *)kexec_kimage_start);
> 
> These are all unsigned long, so why not use the existing mechanism for
> printing unsigned long?
> 
> > +
> > +       /*
> > +        * Copy relocate_new_kernel to the reboot_code_buffer for use
> > +        * after the kernel is shut down.
> > +        */
> > +
> > +       memcpy(reboot_code_buffer, relocate_new_kernel,
> > +               relocate_new_kernel_size);
> > +
> > +       /* Assure reboot_code_buffer is copied. */
> > +
> > +       mb();
> 
> I don't think we need the mb if this is only to guarantee completion
> before the cache flush -- cacheable memory accesses should hazard
> against cache flushes by VA.

OK.

> > +
> > +       pr_info("Bye!\n");
> > +
> > +       local_disable(DAIF_ALL);
> 
> We can move these two right before the soft_restart, after the cache
> maintenance. That way the print is closer to the exit of the current
> kernel.

OK.

> > +
> > +       /* Flush the reboot_code_buffer in preparation for its execution. */
> > +
> > +       __flush_dcache_area(reboot_code_buffer, relocate_new_kernel_size);
> > +
> > +       /* Flush the kimage list. */
> > +
> > +       kexec_list_walk(NULL, image->head, kexec_list_flush_cb);
> > +
> > +       soft_restart(reboot_code_buffer_phys);
> > +}
> > +
> > +void machine_crash_shutdown(struct pt_regs *regs)
> > +{
> > +       /* Empty routine needed to avoid build errors. */
> > +}
> > diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S
> > new file mode 100644
> > index 0000000..92aba9d
> > --- /dev/null
> > +++ b/arch/arm64/kernel/relocate_kernel.S
> > @@ -0,0 +1,185 @@
> > +/*
> > + * kexec for arm64
> > + *
> > + * Copyright (C) Linaro.
> > + *
> > + * This program is free software; you can redistribute it and/or modify
> > + * it under the terms of the GNU General Public License version 2 as
> > + * published by the Free Software Foundation.
> > + */
> > +
> > +#include <asm/assembler.h>
> > +#include <asm/memory.h>
> > +#include <asm/page.h>
> > +
> > +/* The list entry flags. */
> > +
> > +#define IND_DESTINATION_BIT 0
> > +#define IND_INDIRECTION_BIT 1
> > +#define IND_DONE_BIT        2
> > +#define IND_SOURCE_BIT      3
> 
> Given these ned to match the existing IND_* flags in
> include/linux/kexec.h, and they aren't in any way specific to arm64,
> please put these ina an asm-generic header and redefine the existing
> IND_* flags in terms of them.

See my patch that does that:

  https://lists.ozlabs.org/pipermail/linuxppc-dev/2014-August/120368.html
> 
> > +
> > +/*
> > + * relocate_new_kernel - Put the 2nd stage kernel image in place and boot it.
> > + *
> > + * The memory that the old kernel occupies may be overwritten when coping the
> > + * new kernel to its final location.  To assure that the relocate_new_kernel
> > + * routine which does that copy is not overwritten all code and data needed
> > + * by relocate_new_kernel must be between the symbols relocate_new_kernel and
> > + * relocate_new_kernel_end.  The machine_kexec() routine will copy
> > + * relocate_new_kernel to the kexec control_code_page, a special page which
> > + * has been set up to be preserved during the kernel copy operation.
> > + */
> > +
> > +.align 3
> 
> Surely this isn't necessary?

No, the code should be properly aligned.

> > +
> > +.globl relocate_new_kernel
> > +relocate_new_kernel:
> > +
> > +       /* Setup the list loop variables. */
> > +
> > +       ldr     x10, kexec_kimage_head          /* x10 = list entry */
> 
> Any reason for using x10 rather than starting with x0? Or x18, if you
> need to preserve the low registers?
> 
> > +
> > +       mrs     x0, ctr_el0
> > +       ubfm    x0, x0, #16, #19
> > +       mov     x11, #4
> > +       lsl     x11, x11, x0                    /* x11 = dcache line size */
> 
> Any reason we can't use dcache_line_size, given it's a macro?
> 
> > +
> > +       mov     x12, xzr                        /* x12 = segment start */
> > +       mov     x13, xzr                        /* x13 = entry ptr */
> > +       mov     x14, xzr                        /* x14 = copy dest */
> > +
> > +       /* Check if the new kernel needs relocation. */
> > +
> > +       cbz     x10, .Ldone
> > +       tbnz    x10, IND_DONE_BIT, .Ldone
> > +
> > +.Lloop:
> 
> Is there any reason for the '.L' on all of these? We only seem to do
> that in the lib code that was imported from elsewhere, and it doesn't
> match the rest of the arm64 asm.

.L is a local label prefix in gas.  I don't think it would be good to have
these with larger scope.

> > +       and     x15, x10, PAGE_MASK             /* x15 = addr */
> > +
> > +       /* Test the entry flags. */
> > +
> > +.Ltest_source:
> > +       tbz     x10, IND_SOURCE_BIT, .Ltest_indirection
> > +
> > +       /* copy_page(x20 = dest, x21 = src) */
> > +
> > +       mov x20, x14
> > +       mov x21, x15
> > +
> > +1:     ldp     x22, x23, [x21]
> > +       ldp     x24, x25, [x21, #16]
> > +       ldp     x26, x27, [x21, #32]
> > +       ldp     x28, x29, [x21, #48]
> > +       add     x21, x21, #64
> > +       stnp    x22, x23, [x20]
> > +       stnp    x24, x25, [x20, #16]
> > +       stnp    x26, x27, [x20, #32]
> > +       stnp    x28, x29, [x20, #48]
> > +       add     x20, x20, #64
> > +       tst     x21, #(PAGE_SIZE - 1)
> > +       b.ne    1b
> 
> It's a shame we can't reuse copy_page directly. Could we not move the
> body to a macro we can reuse here?

copy_page() also does some memory pre-fetch, which Arun said caused
problems on the APM board.  If that board were available to me for
testing I could investigate, but at this time I will put this suggestion
on my todo list.

> > +
> > +       /* dest += PAGE_SIZE */
> > +
> > +       add     x14, x14, PAGE_SIZE
> > +       b       .Lnext
> > +
> > +.Ltest_indirection:
> > +       tbz     x10, IND_INDIRECTION_BIT, .Ltest_destination
> > +
> > +       /* ptr = addr */
> > +
> > +       mov     x13, x15
> > +       b       .Lnext
> > +
> > +.Ltest_destination:
> > +       tbz     x10, IND_DESTINATION_BIT, .Lnext
> > +
> > +       /* flush segment */
> > +
> > +       bl      .Lflush
> > +       mov     x12, x15
> > +
> > +       /* dest = addr */
> > +
> > +       mov     x14, x15
> > +
> > +.Lnext:
> > +       /* entry = *ptr++ */
> > +
> > +       ldr     x10, [x13]
> > +       add     x13, x13, 8
> 
> This can be:
> 
> 	ldr	x10, [x13], #8
> 
> > +
> > +       /* while (!(entry & DONE)) */
> > +
> > +       tbz     x10, IND_DONE_BIT, .Lloop
> > +
> > +.Ldone:
> > +       /* flush last segment */
> > +
> > +       bl      .Lflush
> > +
> > +       dsb     sy
> > +       isb
> > +       ic      ialluis
> 
> This doesn't look right; we need a dsb and an isb after the instruction
> cache maintenance (or the icache could still be flushing when we branch
> to the new kernel).

OK.

> > +
> > +       /* start_new_kernel */
> > +
> > +       ldr     x4, kexec_kimage_start
> > +       ldr     x0, kexec_dtb_addr
> > +       mov     x1, xzr
> > +       mov     x2, xzr
> > +       mov     x3, xzr
> > +       br      x4
> > +
> > +/* flush - x11 = line size, x12 = start addr, x14 = end addr. */
> > +
> > +.Lflush:
> > +       cbz     x12, 2f
> > +       mov     x0, x12
> > +       sub     x1, x11, #1
> > +       bic     x0, x0, x1
> > +1:     dc      civac, x0
> > +       add     x0, x0, x11
> > +       cmp     x0, x14
> > +       b.lo    1b
> > +2:     ret
> 
> It would be nice if this were earlier in the file, before its callers.

Then we would need to jump over it, which I don't think is
very clean.

> 
> > +
> > +.align 3
> 
> We should have a comment as to why this is needed (to keep the 64-bit
> values below naturally aligned).

I haven't seen such an .align directive comment in any arm64 code yet.

-Geoff





_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 80+ messages in thread

end of thread, other threads:[~2014-09-25  0:25 UTC | newest]

Thread overview: 80+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-09-09 22:51 [PATCH 00/13] arm64 kexec kernel patches V2 Geoff Levand
2014-09-09 22:51 ` Geoff Levand
2014-09-09 22:49 ` [PATCH 04/13] arm64: Add new hcall HVC_CALL_FUNC Geoff Levand
2014-09-09 22:49   ` Geoff Levand
2014-09-10 17:07   ` Will Deacon
2014-09-10 17:07     ` Will Deacon
2014-09-10 17:23     ` Geoff Levand
2014-09-10 17:23       ` Geoff Levand
2014-09-10 17:35       ` Will Deacon
2014-09-10 17:35         ` Will Deacon
2014-09-10 18:11   ` [PATCH V2 " Geoff Levand
2014-09-10 18:11     ` Geoff Levand
2014-09-15 18:11   ` [PATCH " Mark Rutland
2014-09-15 18:11     ` Mark Rutland
2014-09-25  0:24     ` Geoff Levand
2014-09-25  0:24       ` Geoff Levand
2014-09-09 22:49 ` [PATCH 02/13] arm64/kvm: Fix assembler compatibility of macros Geoff Levand
2014-09-09 22:49   ` Geoff Levand
2014-09-10  8:40   ` Ard Biesheuvel
2014-09-10  8:40     ` Ard Biesheuvel
2014-09-10 16:35     ` Geoff Levand
2014-09-10 16:35       ` Geoff Levand
2014-09-10 17:09       ` Ard Biesheuvel
2014-09-10 17:09         ` Ard Biesheuvel
2014-09-15 16:14         ` Mark Rutland
2014-09-15 16:14           ` Mark Rutland
2014-09-10 18:04   ` [PATCH V2 " Geoff Levand
2014-09-10 18:04     ` Geoff Levand
2014-09-09 22:49 ` [PATCH 03/13] arm64: Convert hcalls to use ISS field Geoff Levand
2014-09-09 22:49   ` Geoff Levand
2014-09-11 16:14   ` Arun Chandran
2014-09-11 16:14     ` Arun Chandran
2014-09-15 17:57   ` Mark Rutland
2014-09-15 17:57     ` Mark Rutland
2014-09-22 21:46     ` Geoff Levand
2014-09-22 21:46       ` Geoff Levand
2014-09-09 22:49 ` [PATCH 06/13] arm64: Add new routine read_cpu_properties Geoff Levand
2014-09-09 22:49   ` Geoff Levand
2014-09-15 18:42   ` Mark Rutland
2014-09-15 18:42     ` Mark Rutland
2014-09-25  0:23     ` Geoff Levand
2014-09-25  0:23       ` Geoff Levand
2014-09-09 22:49 ` [PATCH 05/13] arm64: Add EL2 switch to soft_restart Geoff Levand
2014-09-09 22:49   ` Geoff Levand
2014-09-09 22:49 ` [PATCH 01/13] arm64: Add ESR_EL2_EC macros to hyp-stub Geoff Levand
2014-09-09 22:49   ` Geoff Levand
2014-09-15 16:10   ` Mark Rutland
2014-09-15 16:10     ` Mark Rutland
2014-09-22 21:45     ` Geoff Levand
2014-09-22 21:45       ` Geoff Levand
2014-09-09 22:49 ` [PATCH 07/13] arm64: Add new routine local_disable Geoff Levand
2014-09-09 22:49   ` Geoff Levand
2014-09-15 18:56   ` Mark Rutland
2014-09-15 18:56     ` Mark Rutland
2014-09-25  0:24     ` Geoff Levand
2014-09-25  0:24       ` Geoff Levand
2014-09-09 22:49 ` [PATCH 10/13] arm64/kexec: Revert change to machine_shutdown() Geoff Levand
2014-09-09 22:49   ` Geoff Levand
2014-09-15 19:20   ` Mark Rutland
2014-09-15 19:20     ` Mark Rutland
2014-09-09 22:49 ` [PATCH 11/13] arm64/kexec: Add core kexec support Geoff Levand
2014-09-09 22:49   ` Geoff Levand
2014-09-18  1:13   ` Mark Rutland
2014-09-18  1:13     ` Mark Rutland
2014-09-25  0:25     ` Geoff Levand
2014-09-25  0:25       ` Geoff Levand
2014-09-09 22:49 ` [PATCH 13/13] arm64/kexec: Add kexec_ignore_compat_check param Geoff Levand
2014-09-09 22:49   ` Geoff Levand
2014-09-09 22:49 ` [PATCH 12/13] arm64/kexec: Enable kexec in the arm64 defconfig Geoff Levand
2014-09-09 22:49   ` Geoff Levand
2014-09-09 22:49 ` [PATCH 09/13] arm64/kexec: Kexec expects cpu_die Geoff Levand
2014-09-09 22:49   ` Geoff Levand
2014-09-15 19:10   ` Mark Rutland
2014-09-15 19:10     ` Mark Rutland
2014-09-09 22:49 ` [PATCH 08/13] arm64: Use cpu_ops for smp_stop Geoff Levand
2014-09-09 22:49   ` Geoff Levand
2014-09-15 19:06   ` Mark Rutland
2014-09-15 19:06     ` Mark Rutland
2014-09-25  0:24     ` Geoff Levand
2014-09-25  0:24       ` Geoff Levand

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.