linux-arm-kernel.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 4/7] arm64: Add EL2 switch to soft_restart
  2014-09-25  0:23 [PATCH 0/7] arm64 kexec kernel patches V3 Geoff Levand
  2014-09-25  0:23 ` [PATCH 3/7] arm64: Add new hcall HVC_CALL_FUNC Geoff Levand
@ 2014-09-25  0:23 ` Geoff Levand
  2014-09-25  0:23 ` [PATCH 2/7] arm64: Convert hcalls to use ISS field Geoff Levand
                   ` (8 subsequent siblings)
  10 siblings, 0 replies; 65+ messages in thread
From: Geoff Levand @ 2014-09-25  0:23 UTC (permalink / raw)
  To: linux-arm-kernel

When a CPU is reset it needs to be put into the exception level it had when it
entered the kernel.  Update cpu_reset() to accept an argument el2_switch which
signals cpu_reset() to enter the soft reset address at EL2.  If el2_switch is
not set the soft reset address will be entered at EL1.

Update cpu_soft_restart() and soft_restart() to pass the return of
is_hyp_mode_available() as the el2_switch value to cpu_reset().  Also update the
comments of cpu_reset(), cpu_soft_restart() and soft_restart() to reflect this
change.

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/include/asm/proc-fns.h |  4 ++--
 arch/arm64/kernel/process.c       |  6 ++++-
 arch/arm64/mm/proc.S              | 47 +++++++++++++++++++++++++++++----------
 3 files changed, 42 insertions(+), 15 deletions(-)

diff --git a/arch/arm64/include/asm/proc-fns.h b/arch/arm64/include/asm/proc-fns.h
index 9a8fd84..339394d 100644
--- a/arch/arm64/include/asm/proc-fns.h
+++ b/arch/arm64/include/asm/proc-fns.h
@@ -32,8 +32,8 @@ extern void cpu_cache_off(void);
 extern void cpu_do_idle(void);
 extern void cpu_do_switch_mm(unsigned long pgd_phys, struct mm_struct *mm);
 extern void cpu_reset(unsigned long addr) __attribute__((noreturn));
-void cpu_soft_restart(phys_addr_t cpu_reset,
-		unsigned long addr) __attribute__((noreturn));
+void cpu_soft_restart(phys_addr_t cpu_reset, unsigned long el2_switch,
+		      unsigned long addr) __attribute__((noreturn));
 extern void cpu_do_suspend(struct cpu_suspend_ctx *ptr);
 extern u64 cpu_do_resume(phys_addr_t ptr, u64 idmap_ttbr);
 
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index bf66922..0a3414b 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -50,6 +50,7 @@
 #include <asm/mmu_context.h>
 #include <asm/processor.h>
 #include <asm/stacktrace.h>
+#include <asm/virt.h>
 
 #ifdef CONFIG_CC_STACKPROTECTOR
 #include <linux/stackprotector.h>
@@ -60,7 +61,10 @@ EXPORT_SYMBOL(__stack_chk_guard);
 void soft_restart(unsigned long addr)
 {
 	setup_mm_for_reboot();
-	cpu_soft_restart(virt_to_phys(cpu_reset), addr);
+
+	cpu_soft_restart(virt_to_phys(cpu_reset), is_hyp_mode_available(),
+			 addr);
+
 	/* Should never get here */
 	BUG();
 }
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 4e778b1..7467199 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -25,6 +25,7 @@
 #include <asm/hwcap.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/pgtable.h>
+#include <asm/virt.h>
 
 #include "proc-macros.S"
 
@@ -59,27 +60,48 @@ ENTRY(cpu_cache_off)
 ENDPROC(cpu_cache_off)
 
 /*
- *	cpu_reset(loc)
+ * cpu_reset(el2_switch, loc) - Helper for cpu_soft_restart.
  *
- *	Perform a soft reset of the system.  Put the CPU into the same state
- *	as it would be if it had been reset, and branch to what would be the
- *	reset vector. It must be executed with the flat identity mapping.
+ * @cpu_reset: Physical address of the cpu_reset routine.
+ * @el2_switch: Flag to indicate a swich to EL2 is needed.
+ * @addr: Location to jump to for soft reset.
  *
- *	- loc   - location to jump to for soft reset
+ * Put the CPU into the same state as it would be if it had been reset, and
+ * branch to what would be the reset vector. It must be executed with the
+ * flat identity mapping.
  */
+
 	.align	5
+
 ENTRY(cpu_reset)
-	mrs	x1, sctlr_el1
-	bic	x1, x1, #1
-	msr	sctlr_el1, x1			// disable the MMU
+	mrs	x2, sctlr_el1
+	bic	x2, x2, #1
+	msr	sctlr_el1, x2			// disable the MMU
 	isb
-	ret	x0
+
+	cbz	x0, 1f				// el2_switch?
+	mov	x0, x1
+	mov	x1, xzr
+	mov	x2, xzr
+	mov	x3, xzr
+	hvc	#HVC_CALL_FUNC			// no return
+
+1:	ret	x1
 ENDPROC(cpu_reset)
 
+/*
+ * cpu_soft_restart(cpu_reset, el2_switch, addr) - Perform a cpu soft reset.
+ *
+ * @cpu_reset: Physical address of the cpu_reset routine.
+ * @el2_switch: Flag to indicate a swich to EL2 is needed, passed to cpu_reset.
+ * @addr: Location to jump to for soft reset, passed to cpu_reset.
+ *
+ */
+
 ENTRY(cpu_soft_restart)
-	/* Save address of cpu_reset() and reset address */
-	mov	x19, x0
-	mov	x20, x1
+	mov	x19, x0				// cpu_reset
+	mov	x20, x1				// el2_switch
+	mov	x21, x2				// addr
 
 	/* Turn D-cache off */
 	bl	cpu_cache_off
@@ -88,6 +110,7 @@ ENTRY(cpu_soft_restart)
 	bl	flush_cache_all
 
 	mov	x0, x20
+	mov	x1, x21
 	ret	x19
 ENDPROC(cpu_soft_restart)
 
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH 1/7] arm64/kvm: Fix assembler compatibility of macros
  2014-09-25  0:23 [PATCH 0/7] arm64 kexec kernel patches V3 Geoff Levand
                   ` (2 preceding siblings ...)
  2014-09-25  0:23 ` [PATCH 2/7] arm64: Convert hcalls to use ISS field Geoff Levand
@ 2014-09-25  0:23 ` Geoff Levand
  2014-10-03 10:26   ` Mark Rutland
  2014-09-25  0:23 ` [PATCH 7/7] arm64/kexec: Enable kexec in the arm64 defconfig Geoff Levand
                   ` (6 subsequent siblings)
  10 siblings, 1 reply; 65+ messages in thread
From: Geoff Levand @ 2014-09-25  0:23 UTC (permalink / raw)
  To: linux-arm-kernel

Some of the macros defined in kvm_arm.h are useful in the exception vector
routines, but they are not compatible with the assembler.  Change the
definition of ESR_EL2_ISS to be compatible.

Fixes build errors like these when using kvm_arm.h in assembly
source files:

  Error: unexpected characters following instruction at operand 3 -- `and x0,x1,#((1U<<25)-1)'

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/include/asm/kvm_arm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index cc83520..fb42ab5 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -175,7 +175,7 @@
 /* Exception Syndrome Register (ESR) bits */
 #define ESR_EL2_EC_SHIFT	(26)
 #define ESR_EL2_EC		(0x3fU << ESR_EL2_EC_SHIFT)
-#define ESR_EL2_IL		(1U << 25)
+#define ESR_EL2_IL		(UL(1) << 25)
 #define ESR_EL2_ISS		(ESR_EL2_IL - 1)
 #define ESR_EL2_ISV_SHIFT	(24)
 #define ESR_EL2_ISV		(1U << ESR_EL2_ISV_SHIFT)
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH 0/7] arm64 kexec kernel patches V3
@ 2014-09-25  0:23 Geoff Levand
  2014-09-25  0:23 ` [PATCH 3/7] arm64: Add new hcall HVC_CALL_FUNC Geoff Levand
                   ` (10 more replies)
  0 siblings, 11 replies; 65+ messages in thread
From: Geoff Levand @ 2014-09-25  0:23 UTC (permalink / raw)
  To: linux-arm-kernel

Hi All,

This series adds the core support for kexec re-boots on arm64.  I have tested
with the ARM VE fast model using various kernel config options for both the
first and second stage kernels.

To load a second stage kernel and execute a kexec re-boot on arm64 my patches to
kexec-tools [2], which have not yet been merged upstream, are needed.

This series does not include some re-work of the spin-table CPU enable method
that is needed to support it, nor does it include some re-work of KVM to support
CPU soft reset.  A kernel built with these patches will boot and run correctly,
but will fail to load a kexec kernel if running on a machine with any spin-table
enabled CPUs and will fail the kexec re-boot if the first stage kernel was built
with CONFIG_KVM=y.  Work-in-progress patches to support these are in the master
branch of my linux-kexec repository [1].

Patches 1-4 rework the arm64 hcall mechanism to give the arm64 soft_restart()
routine the ability to switch exception levels from EL1 to EL2 for kernels that
were entered in EL2.

Patch 5 moves proc-macros.S from arm64/mm to arm64/include/asm so that the
dcache_line_size macro it defines can be uesd by kexec's relocate kernel
routine.

Patches 11 and 12 add the actual kexec support.

Please consider all patches for inclusion.  Any comments or suggestions on how 
to improve are welcome.

[1]  https://git.linaro.org/people/geoff.levand/linux-kexec.git
[2]  https://git.linaro.org/people/geoff.levand/kexec-tools.git

-Geoff

The following changes since commit 1059c6bf8534acda249e7e65c81e7696fb074dc1:

  arm64: debug: don't re-enable debug exceptions on return from el1_dbg (2014-09-23 15:49:34 +0100)

are available in the git repository at:

  git://git.linaro.org/people/geoff.levand/linux-kexec.git kexec-v3

for you to fetch changes up to d0dcec391ae430d2c0aa2ba770c4c21b1414fa68:

  arm64/kexec: Enable kexec in the arm64 defconfig (2014-09-24 17:18:32 -0700)

----------------------------------------------------------------
Geoff Levand (7):
      arm64/kvm: Fix assembler compatibility of macros
      arm64: Convert hcalls to use ISS field
      arm64: Add new hcall HVC_CALL_FUNC
      arm64: Add EL2 switch to soft_restart
      arm64: Move proc-macros.S to include/asm
      arm64/kexec: Add core kexec support
      arm64/kexec: Enable kexec in the arm64 defconfig

 arch/arm64/Kconfig                           |   9 ++
 arch/arm64/configs/defconfig                 |   1 +
 arch/arm64/include/asm/kexec.h               |  47 +++++++
 arch/arm64/include/asm/kvm_arm.h             |   2 +-
 arch/arm64/include/asm/proc-fns.h            |   4 +-
 arch/arm64/{mm => include/asm}/proc-macros.S |   0
 arch/arm64/include/asm/virt.h                |  33 +++++
 arch/arm64/kernel/Makefile                   |   1 +
 arch/arm64/kernel/hyp-stub.S                 |  44 +++++--
 arch/arm64/kernel/machine_kexec.c            | 183 +++++++++++++++++++++++++++
 arch/arm64/kernel/process.c                  |   6 +-
 arch/arm64/kernel/relocate_kernel.S          | 183 +++++++++++++++++++++++++++
 arch/arm64/kvm/hyp.S                         |  18 ++-
 arch/arm64/mm/cache.S                        |   3 +-
 arch/arm64/mm/proc.S                         |  50 ++++++--
 include/uapi/linux/kexec.h                   |   1 +
 16 files changed, 546 insertions(+), 39 deletions(-)
 create mode 100644 arch/arm64/include/asm/kexec.h
 rename arch/arm64/{mm => include/asm}/proc-macros.S (100%)
 create mode 100644 arch/arm64/kernel/machine_kexec.c
 create mode 100644 arch/arm64/kernel/relocate_kernel.S

-- 
1.9.1

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 3/7] arm64: Add new hcall HVC_CALL_FUNC
  2014-09-25  0:23 [PATCH 0/7] arm64 kexec kernel patches V3 Geoff Levand
@ 2014-09-25  0:23 ` Geoff Levand
  2014-09-25  0:23 ` [PATCH 4/7] arm64: Add EL2 switch to soft_restart Geoff Levand
                   ` (9 subsequent siblings)
  10 siblings, 0 replies; 65+ messages in thread
From: Geoff Levand @ 2014-09-25  0:23 UTC (permalink / raw)
  To: linux-arm-kernel

Add the new hcall HVC_CALL_FUNC that allows execution of a function at EL2.
During CPU reset the CPU must be brought to the exception level it had on
entry to the kernel.  The HVC_CALL_FUNC hcall will provide the mechanism
needed for this exception level switch.

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/include/asm/virt.h | 13 +++++++++++++
 arch/arm64/kernel/hyp-stub.S  | 17 ++++++++++++++---
 2 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 99c319c..055e8d8 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -41,6 +41,19 @@
 
 #define HVC_CALL_HYP 3
 
+/*
+ * HVC_CALL_FUNC - Execute a function at EL2.
+ *
+ * @x0: Physical address of the funtion to be executed.
+ * @x1: Passed as the first argument to the function.
+ * @x2: Passed as the second argument to the function.
+ * @x3: Passed as the third argument to the function.
+ *
+ * The called function must preserve the contents of register x18.
+ */
+
+#define HVC_CALL_FUNC 4
+
 #ifndef __ASSEMBLY__
 
 /*
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index 8b39522..8dfb3a6 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -65,9 +65,20 @@ el1_sync:
 	mrs	x0, vbar_el2
 	b	2f
 
-1:	cmp	x18, #HVC_SET_VECTORS
-	b.ne	2f
-	msr	vbar_el2, x0
+1:	cmp     x18, #HVC_SET_VECTORS
+	b.ne    1f
+	msr     vbar_el2, x0
+	b       2f
+
+1:	cmp     x18, #HVC_CALL_FUNC
+	b.ne    2f
+	mov     x18, lr
+	mov     lr, x0
+	mov     x0, x1
+	mov     x1, x2
+	mov     x2, x3
+	blr     lr
+	mov     lr, x18
 
 2:	eret
 ENDPROC(el1_sync)
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH 2/7] arm64: Convert hcalls to use ISS field
  2014-09-25  0:23 [PATCH 0/7] arm64 kexec kernel patches V3 Geoff Levand
  2014-09-25  0:23 ` [PATCH 3/7] arm64: Add new hcall HVC_CALL_FUNC Geoff Levand
  2014-09-25  0:23 ` [PATCH 4/7] arm64: Add EL2 switch to soft_restart Geoff Levand
@ 2014-09-25  0:23 ` Geoff Levand
  2014-10-03 10:51   ` Mark Rutland
  2014-09-25  0:23 ` [PATCH 1/7] arm64/kvm: Fix assembler compatibility of macros Geoff Levand
                   ` (7 subsequent siblings)
  10 siblings, 1 reply; 65+ messages in thread
From: Geoff Levand @ 2014-09-25  0:23 UTC (permalink / raw)
  To: linux-arm-kernel

To allow for additional hcalls to be defined and to make the arm64 hcall API
more consistent across exception vector routines change the hcall implementations
to use the ISS field of the ESR_EL2 register to specify the hcall type.

The existing arm64 hcall implementations are limited in that they only allow
for two distinct hcalls; with the x0 register either zero, or not zero.  Also,
the API of the hyp-stub exception vector routines and the KVM exception vector
routines differ; hyp-stub uses a non-zero value in x0 to implement
__hyp_set_vectors, whereas KVM uses it to implement kvm_call_hyp.

Define three new preprocessor macros HVC_GET_VECTORS, HVC_SET_VECTORS and
HVC_CALL_HYP and to be used as hcall type specifiers and convert the
existing __hyp_get_vectors(), __hyp_set_vectors() and kvm_call_hyp() routines
to use these new macros when executing and HVC call.  Also change the
corresponding hyp-stub and KVM el1_sync exception vector routines to use these
new macros.

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/include/asm/virt.h | 20 ++++++++++++++++++++
 arch/arm64/kernel/hyp-stub.S  | 33 +++++++++++++++++++++------------
 arch/arm64/kvm/hyp.S          | 18 +++++++++++-------
 3 files changed, 52 insertions(+), 19 deletions(-)

diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 7a5df52..99c319c 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -21,6 +21,26 @@
 #define BOOT_CPU_MODE_EL1	(0xe11)
 #define BOOT_CPU_MODE_EL2	(0xe12)
 
+/*
+ * HVC_GET_VECTORS - Return the value of the vbar_el2 register.
+ */
+
+#define HVC_GET_VECTORS 1
+
+/*
+ * HVC_SET_VECTORS - Set the value of the vbar_el2 register.
+ *
+ * @x0: Physical address of the new vector table.
+ */
+
+#define HVC_SET_VECTORS 2
+
+/*
+ * HVC_CALL_HYP - Execute a hyp routine.
+ */
+
+#define HVC_CALL_HYP 3
+
 #ifndef __ASSEMBLY__
 
 /*
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index a272f33..8b39522 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -53,14 +53,22 @@ ENDPROC(__hyp_stub_vectors)
 	.align 11
 
 el1_sync:
-	mrs	x1, esr_el2
-	lsr	x1, x1, #26
-	cmp	x1, #0x16
-	b.ne	2f				// Not an HVC trap
-	cbz	x0, 1f
-	msr	vbar_el2, x0			// Set vbar_el2
+	mrs	x18, esr_el2
+	lsr	x17, x18, #26			// x17=EC
+	and	x18, x18, #0xffffff		// x18=ISS
+
+	cmp     x17, #0x16
+	b.ne    2f				// Not an HVC trap
+
+	cmp	x18, #HVC_GET_VECTORS
+	b.ne	1f
+	mrs	x0, vbar_el2
 	b	2f
-1:	mrs	x0, vbar_el2			// Return vbar_el2
+
+1:	cmp	x18, #HVC_SET_VECTORS
+	b.ne	2f
+	msr	vbar_el2, x0
+
 2:	eret
 ENDPROC(el1_sync)
 
@@ -100,11 +108,12 @@ ENDPROC(\label)
  * initialisation entry point.
  */
 
-ENTRY(__hyp_get_vectors)
-	mov	x0, xzr
-	// fall through
 ENTRY(__hyp_set_vectors)
-	hvc	#0
+	hvc	#HVC_SET_VECTORS
 	ret
-ENDPROC(__hyp_get_vectors)
 ENDPROC(__hyp_set_vectors)
+
+ENTRY(__hyp_get_vectors)
+	hvc	#HVC_GET_VECTORS
+	ret
+ENDPROC(__hyp_get_vectors)
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index b72aa9f..9607f15 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -26,6 +26,7 @@
 #include <asm/kvm_asm.h>
 #include <asm/kvm_arm.h>
 #include <asm/kvm_mmu.h>
+#include <asm/virt.h>
 
 #define CPU_GP_REG_OFFSET(x)	(CPU_GP_REGS + x)
 #define CPU_XREG_OFFSET(x)	CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x)
@@ -1105,12 +1106,9 @@ __hyp_panic_str:
  * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c).  Return values are
  * passed in r0 and r1.
  *
- * A function pointer with a value of 0 has a special meaning, and is
- * used to implement __hyp_get_vectors in the same way as in
- * arch/arm64/kernel/hyp_stub.S.
  */
 ENTRY(kvm_call_hyp)
-	hvc	#0
+	hvc	#HVC_CALL_HYP
 	ret
 ENDPROC(kvm_call_hyp)
 
@@ -1140,6 +1138,7 @@ el1_sync:					// Guest trapped into EL2
 	push	x2, x3
 
 	mrs	x1, esr_el2
+	and	x0, x1, #ESR_EL2_ISS
 	lsr	x2, x1, #ESR_EL2_EC_SHIFT
 
 	cmp	x2, #ESR_EL2_EC_HVC64
@@ -1149,15 +1148,19 @@ el1_sync:					// Guest trapped into EL2
 	cbnz	x3, el1_trap			// called HVC
 
 	/* Here, we're pretty sure the host called HVC. */
+	mov	x18, x0
 	pop	x2, x3
 	pop	x0, x1
 
-	/* Check for __hyp_get_vectors */
-	cbnz	x0, 1f
+	cmp	x18, #HVC_GET_VECTORS
+	b.ne	1f
 	mrs	x0, vbar_el2
 	b	2f
 
-1:	push	lr, xzr
+1:	cmp	x18, #HVC_CALL_HYP
+	b.ne	2f
+
+	push	lr, xzr
 
 	/*
 	 * Compute the function address in EL2, and shuffle the parameters.
@@ -1170,6 +1173,7 @@ el1_sync:					// Guest trapped into EL2
 	blr	lr
 
 	pop	lr, xzr
+
 2:	eret
 
 el1_trap:
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH 6/7] arm64/kexec: Add core kexec support
  2014-09-25  0:23 [PATCH 0/7] arm64 kexec kernel patches V3 Geoff Levand
                   ` (5 preceding siblings ...)
  2014-09-25  0:23 ` [PATCH 5/7] arm64: Move proc-macros.S to include/asm Geoff Levand
@ 2014-09-25  0:23 ` Geoff Levand
  2014-09-25 18:28   ` Vivek Goyal
                     ` (2 more replies)
  2014-09-30 20:29 ` [PATCH 0/7] arm64 kexec kernel patches V3 Vivek Goyal
                   ` (3 subsequent siblings)
  10 siblings, 3 replies; 65+ messages in thread
From: Geoff Levand @ 2014-09-25  0:23 UTC (permalink / raw)
  To: linux-arm-kernel

Add three new files, kexec.h, machine_kexec.c and relocate_kernel.S to the
arm64 architecture that add support for the kexec re-boot mechanism
(CONFIG_KEXEC) on arm64 platforms.

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/Kconfig                  |   9 ++
 arch/arm64/include/asm/kexec.h      |  47 +++++++++
 arch/arm64/kernel/Makefile          |   1 +
 arch/arm64/kernel/machine_kexec.c   | 183 ++++++++++++++++++++++++++++++++++++
 arch/arm64/kernel/relocate_kernel.S | 183 ++++++++++++++++++++++++++++++++++++
 include/uapi/linux/kexec.h          |   1 +
 6 files changed, 424 insertions(+)
 create mode 100644 arch/arm64/include/asm/kexec.h
 create mode 100644 arch/arm64/kernel/machine_kexec.c
 create mode 100644 arch/arm64/kernel/relocate_kernel.S

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index f0d3a2d..af03449 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -313,6 +313,15 @@ config ARCH_HAS_CACHE_LINE_SIZE
 
 source "mm/Kconfig"
 
+config KEXEC
+	depends on (!SMP || PM_SLEEP_SMP)
+	bool "kexec system call"
+	---help---
+	  kexec is a system call that implements the ability to shutdown your
+	  current kernel, and to start another kernel.  It is like a reboot
+	  but it is independent of the system firmware.   And like a reboot
+	  you can start any kernel with it, not just Linux.
+
 config XEN_DOM0
 	def_bool y
 	depends on XEN
diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
new file mode 100644
index 0000000..e7bd7ab
--- /dev/null
+++ b/arch/arm64/include/asm/kexec.h
@@ -0,0 +1,47 @@
+/*
+ * kexec for arm64
+ *
+ * Copyright (C) Linaro.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#if !defined(_ARM64_KEXEC_H)
+#define _ARM64_KEXEC_H
+
+/* Maximum physical address we can use pages from */
+
+#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
+
+/* Maximum address we can reach in physical address mode */
+
+#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
+
+/* Maximum address we can use for the control code buffer */
+
+#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL)
+
+#define KEXEC_CONTROL_PAGE_SIZE	4096
+
+#define KEXEC_ARCH KEXEC_ARCH_ARM64
+
+#if !defined(__ASSEMBLY__)
+
+/**
+ * crash_setup_regs() - save registers for the panic kernel
+ *
+ * @newregs: registers are saved here
+ * @oldregs: registers to be saved (may be %NULL)
+ */
+
+static inline void crash_setup_regs(struct pt_regs *newregs,
+				    struct pt_regs *oldregs)
+{
+	/* Empty routine needed to avoid build errors. */
+}
+
+#endif /* !defined(__ASSEMBLY__) */
+
+#endif
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 6e9538c..77a7351 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -30,6 +30,7 @@ arm64-obj-$(CONFIG_CPU_IDLE)		+= cpuidle.o
 arm64-obj-$(CONFIG_JUMP_LABEL)		+= jump_label.o
 arm64-obj-$(CONFIG_KGDB)		+= kgdb.o
 arm64-obj-$(CONFIG_EFI)			+= efi.o efi-stub.o efi-entry.o
+arm64-obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o
 
 obj-y					+= $(arm64-obj-y) vdso/
 obj-m					+= $(arm64-obj-m)
diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
new file mode 100644
index 0000000..22d185c
--- /dev/null
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -0,0 +1,183 @@
+/*
+ * kexec for arm64
+ *
+ * Copyright (C) Linaro.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kexec.h>
+#include <linux/of_fdt.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+#include <asm/cacheflush.h>
+#include <asm/system_misc.h>
+
+/* Global variables for the relocate_kernel routine. */
+
+extern const unsigned char relocate_new_kernel[];
+extern const unsigned long relocate_new_kernel_size;
+extern unsigned long kexec_dtb_addr;
+extern unsigned long kexec_kimage_head;
+extern unsigned long kexec_kimage_start;
+
+/**
+ * kexec_list_walk - Helper to walk the kimage page list.
+ */
+
+static void kexec_list_walk(void *ctx, unsigned long kimage_head,
+	void (*cb)(void *ctx, unsigned int flag, void *addr, void *dest))
+{
+	void *dest;
+	unsigned long *entry;
+
+	for (entry = &kimage_head, dest = NULL; ; entry++) {
+		unsigned int flag = *entry & 
+			(IND_DESTINATION | IND_INDIRECTION | IND_DONE |
+			IND_SOURCE);
+		void *addr = phys_to_virt(*entry & PAGE_MASK);
+
+		switch (flag) {
+		case IND_INDIRECTION:
+			entry = (unsigned long *)addr - 1;
+			cb(ctx, flag, addr, NULL);
+			break;
+		case IND_DESTINATION:
+			dest = addr;
+			cb(ctx, flag, addr, NULL);
+			break;
+		case IND_SOURCE:
+			cb(ctx, flag, addr, dest);
+			dest += PAGE_SIZE;
+			break;
+		case IND_DONE:
+			cb(ctx, flag , NULL, NULL);
+			return;
+		default:
+			break;
+		}
+	}
+}
+
+/**
+ * kexec_is_dtb - Helper routine to check the device tree header signature.
+ */
+
+static bool kexec_is_dtb(const void *dtb)
+{
+	__be32 magic;
+
+	return get_user(magic, (__be32 *)dtb) ? false :
+		(be32_to_cpu(magic) == OF_DT_HEADER);
+}
+
+/**
+ * kexec_find_dtb_seg - Helper routine to find the dtb segment.
+ */
+
+static const struct kexec_segment *kexec_find_dtb_seg(
+	const struct kimage *image)
+{
+	int i;
+
+	for (i = 0; i < image->nr_segments; i++) {
+		if (kexec_is_dtb(image->segment[i].buf))
+			return &image->segment[i];
+	}
+
+	return NULL;
+}
+
+void machine_kexec_cleanup(struct kimage *image)
+{
+	/* Empty routine needed to avoid build errors. */
+}
+
+/**
+ * machine_kexec_prepare - Prepare for a kexec reboot.
+ *
+ * Called from the core kexec code when a kernel image is loaded.
+ */
+
+int machine_kexec_prepare(struct kimage *image)
+{
+	const struct kexec_segment *dtb_seg;
+
+	dtb_seg = kexec_find_dtb_seg(image);
+
+	if (!dtb_seg)
+		return -EINVAL;
+
+	kexec_dtb_addr = dtb_seg->mem;
+	kexec_kimage_start = image->start;
+
+	return 0;
+}
+
+/**
+ * kexec_list_flush_cb - Callback to flush the kimage list to PoC.
+ */
+
+static void kexec_list_flush_cb(void *ctx , unsigned int flag,
+	void *addr, void *dest)
+{
+	switch (flag) {
+	case IND_INDIRECTION:
+	case IND_SOURCE:
+		__flush_dcache_area(addr, PAGE_SIZE);
+		break;
+	default:
+		break;
+	}
+}
+
+/**
+ * machine_kexec - Do the kexec reboot.
+ *
+ * Called from the core kexec code for a sys_reboot with LINUX_REBOOT_CMD_KEXEC.
+ */
+
+void machine_kexec(struct kimage *image)
+{
+	phys_addr_t reboot_code_buffer_phys;
+	void *reboot_code_buffer;
+
+	BUG_ON(num_online_cpus() > 1);
+
+	kexec_kimage_head = image->head;
+
+	reboot_code_buffer_phys = page_to_phys(image->control_code_page);
+	reboot_code_buffer = phys_to_virt(reboot_code_buffer_phys);
+
+	/*
+	 * Copy relocate_new_kernel to the reboot_code_buffer for use
+	 * after the kernel is shut down.
+	 */
+
+	memcpy(reboot_code_buffer, relocate_new_kernel,
+		relocate_new_kernel_size);
+
+	/* Flush the reboot_code_buffer in preparation for its execution. */
+
+	__flush_dcache_area(reboot_code_buffer, relocate_new_kernel_size);
+
+	/* Flush the kimage list. */
+
+	kexec_list_walk(NULL, image->head, kexec_list_flush_cb);
+
+	pr_info("Bye!\n");
+
+	/* Disable all DAIF exceptions. */
+	
+	asm volatile ("msr daifset, #0xf" : : : "memory");
+
+	soft_restart(reboot_code_buffer_phys);
+}
+
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+	/* Empty routine needed to avoid build errors. */
+}
diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S
new file mode 100644
index 0000000..07ce41a
--- /dev/null
+++ b/arch/arm64/kernel/relocate_kernel.S
@@ -0,0 +1,183 @@
+/*
+ * kexec for arm64
+ *
+ * Copyright (C) Linaro.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/assembler.h>
+#include <asm/kexec.h>
+#include <asm/memory.h>
+#include <asm/page.h>
+#include <asm/proc-macros.S>
+
+/* The list entry flags. */
+
+#define IND_DESTINATION_BIT 0
+#define IND_INDIRECTION_BIT 1
+#define IND_DONE_BIT        2
+#define IND_SOURCE_BIT      3
+
+/*
+ * relocate_new_kernel - Put the 2nd stage kernel image in place and boot it.
+ *
+ * The memory that the old kernel occupies may be overwritten when coping the
+ * new kernel to its final location.  To assure that the relocate_new_kernel
+ * routine which does that copy is not overwritten all code and data needed
+ * by relocate_new_kernel must be between the symbols relocate_new_kernel and
+ * relocate_new_kernel_end.  The machine_kexec() routine will copy
+ * relocate_new_kernel to the kexec control_code_page, a special page which
+ * has been set up to be preserved during the kernel copy operation.
+ */
+
+.globl relocate_new_kernel
+relocate_new_kernel:
+
+	/* Setup the list loop variables. */
+
+	ldr	x18, kexec_kimage_head		/* x18 = list entry */
+	dcache_line_size x17, x0		/* x17 = dcache line size */
+	mov	x16, xzr			/* x16 = segment start */
+	mov	x15, xzr			/* x15 = entry ptr */
+	mov	x14, xzr			/* x14 = copy dest */
+
+	/* Check if the new kernel needs relocation. */
+
+	cbz	x18, .Ldone
+	tbnz	x18, IND_DONE_BIT, .Ldone
+
+.Lloop:
+	and	x13, x18, PAGE_MASK		/* x13 = addr */
+
+	/* Test the entry flags. */
+
+.Ltest_source:
+	tbz	x18, IND_SOURCE_BIT, .Ltest_indirection
+
+	/* copy_page(x20 = dest, x21 = src) */
+
+	mov x20, x14
+	mov x21, x13
+
+1:	ldp	x22, x23, [x21]
+	ldp	x24, x25, [x21, #16]
+	ldp	x26, x27, [x21, #32]
+	ldp	x28, x29, [x21, #48]
+	add	x21, x21, #64
+	stnp	x22, x23, [x20]
+	stnp	x24, x25, [x20, #16]
+	stnp	x26, x27, [x20, #32]
+	stnp	x28, x29, [x20, #48]
+	add	x20, x20, #64
+	tst	x21, #(PAGE_SIZE - 1)
+	b.ne	1b
+
+	/* dest += PAGE_SIZE */
+
+	add	x14, x14, PAGE_SIZE
+	b	.Lnext
+
+.Ltest_indirection:
+	tbz	x18, IND_INDIRECTION_BIT, .Ltest_destination
+
+	/* ptr = addr */
+
+	mov	x15, x13
+	b	.Lnext
+
+.Ltest_destination:
+	tbz	x18, IND_DESTINATION_BIT, .Lnext
+
+	/* flush segment */
+
+	bl	.Lflush
+	mov	x16, x13
+
+	/* dest = addr */
+
+	mov	x14, x13
+
+.Lnext:
+	/* entry = *ptr++ */
+
+	ldr	x18, [x15], #8
+
+	/* while (!(entry & DONE)) */
+
+	tbz	x18, IND_DONE_BIT, .Lloop
+
+.Ldone:
+	/* flush last segment */
+
+	bl	.Lflush
+
+	dsb	sy
+	isb
+	ic	ialluis
+	dsb	sy
+	isb
+
+	/* start_new_kernel */
+
+	ldr	x4, kexec_kimage_start
+	ldr	x0, kexec_dtb_addr
+	mov	x1, xzr
+	mov	x2, xzr
+	mov	x3, xzr
+	br	x4
+
+/* flush - x17 = line size, x16 = start addr, x14 = end addr. */
+
+.Lflush:
+	cbz	x16, 2f
+	mov	x0, x16
+	sub	x1, x17, #1
+	bic	x0, x0, x1
+1:	dc	civac, x0
+	add	x0, x0, x17
+	cmp	x0, x14
+	b.lo	1b
+2:	ret
+
+.align 3	/* To keep the 64-bit values below naturally aligned. */
+
+/* The machine_kexec routines set these variables. */
+
+/*
+ * kexec_dtb_addr - Physical address of the new kernel's device tree.
+ */
+
+.globl kexec_dtb_addr
+kexec_dtb_addr:
+	.quad	0x0
+
+/*
+ * kexec_kimage_head - Copy of image->head, the list of kimage entries.
+ */
+
+.globl kexec_kimage_head
+kexec_kimage_head:
+	.quad	0x0
+
+/*
+ * kexec_kimage_start - Copy of image->start, the entry point of the new kernel.
+ */
+
+.globl kexec_kimage_start
+kexec_kimage_start:
+	.quad	0x0
+
+.Lrelocate_new_kernel_end:
+
+/*
+ * relocate_new_kernel_size - Number of bytes to copy to the control_code_page.
+ */
+
+.globl relocate_new_kernel_size
+relocate_new_kernel_size:
+	.quad .Lrelocate_new_kernel_end - relocate_new_kernel
+
+.org	KEXEC_CONTROL_PAGE_SIZE
diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h
index 6925f5b..04626b9 100644
--- a/include/uapi/linux/kexec.h
+++ b/include/uapi/linux/kexec.h
@@ -39,6 +39,7 @@
 #define KEXEC_ARCH_SH      (42 << 16)
 #define KEXEC_ARCH_MIPS_LE (10 << 16)
 #define KEXEC_ARCH_MIPS    ( 8 << 16)
+#define KEXEC_ARCH_ARM64   (183 << 16)
 
 /* The artificial cap on the number of segments passed to kexec_load. */
 #define KEXEC_SEGMENT_MAX 16
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH 5/7] arm64: Move proc-macros.S to include/asm
  2014-09-25  0:23 [PATCH 0/7] arm64 kexec kernel patches V3 Geoff Levand
                   ` (4 preceding siblings ...)
  2014-09-25  0:23 ` [PATCH 7/7] arm64/kexec: Enable kexec in the arm64 defconfig Geoff Levand
@ 2014-09-25  0:23 ` Geoff Levand
  2014-09-25  0:23 ` [PATCH 6/7] arm64/kexec: Add core kexec support Geoff Levand
                   ` (4 subsequent siblings)
  10 siblings, 0 replies; 65+ messages in thread
From: Geoff Levand @ 2014-09-25  0:23 UTC (permalink / raw)
  To: linux-arm-kernel

To allow the assembler macros defined in proc-macros.S to be used outside
the mm code move the proc-macros.S file from arch/arm64/mm/ to
arch/arm64/include/asm/ and fix up any preprocessor includes to use the new
file location.

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/include/asm/proc-macros.S | 54 ++++++++++++++++++++++++++++++++++++
 arch/arm64/mm/cache.S                |  3 +-
 arch/arm64/mm/proc-macros.S          | 54 ------------------------------------
 arch/arm64/mm/proc.S                 |  3 +-
 4 files changed, 56 insertions(+), 58 deletions(-)
 create mode 100644 arch/arm64/include/asm/proc-macros.S
 delete mode 100644 arch/arm64/mm/proc-macros.S

diff --git a/arch/arm64/include/asm/proc-macros.S b/arch/arm64/include/asm/proc-macros.S
new file mode 100644
index 0000000..005d29e
--- /dev/null
+++ b/arch/arm64/include/asm/proc-macros.S
@@ -0,0 +1,54 @@
+/*
+ * Based on arch/arm/mm/proc-macros.S
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <asm/asm-offsets.h>
+#include <asm/thread_info.h>
+
+/*
+ * vma_vm_mm - get mm pointer from vma pointer (vma->vm_mm)
+ */
+	.macro	vma_vm_mm, rd, rn
+	ldr	\rd, [\rn, #VMA_VM_MM]
+	.endm
+
+/*
+ * mmid - get context id from mm pointer (mm->context.id)
+ */
+	.macro	mmid, rd, rn
+	ldr	\rd, [\rn, #MM_CONTEXT_ID]
+	.endm
+
+/*
+ * dcache_line_size - get the minimum D-cache line size from the CTR register.
+ */
+	.macro	dcache_line_size, reg, tmp
+	mrs	\tmp, ctr_el0			// read CTR
+	ubfm	\tmp, \tmp, #16, #19		// cache line size encoding
+	mov	\reg, #4			// bytes per word
+	lsl	\reg, \reg, \tmp		// actual cache line size
+	.endm
+
+/*
+ * icache_line_size - get the minimum I-cache line size from the CTR register.
+ */
+	.macro	icache_line_size, reg, tmp
+	mrs	\tmp, ctr_el0			// read CTR
+	and	\tmp, \tmp, #0xf		// cache line size encoding
+	mov	\reg, #4			// bytes per word
+	lsl	\reg, \reg, \tmp		// actual cache line size
+	.endm
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 2366383..4e811a4 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -20,8 +20,7 @@
 #include <linux/linkage.h>
 #include <linux/init.h>
 #include <asm/assembler.h>
-
-#include "proc-macros.S"
+#include <asm/proc-macros.S>
 
 /*
  *	__flush_dcache_all()
diff --git a/arch/arm64/mm/proc-macros.S b/arch/arm64/mm/proc-macros.S
deleted file mode 100644
index 005d29e..0000000
--- a/arch/arm64/mm/proc-macros.S
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Based on arch/arm/mm/proc-macros.S
- *
- * Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
-
-/*
- * vma_vm_mm - get mm pointer from vma pointer (vma->vm_mm)
- */
-	.macro	vma_vm_mm, rd, rn
-	ldr	\rd, [\rn, #VMA_VM_MM]
-	.endm
-
-/*
- * mmid - get context id from mm pointer (mm->context.id)
- */
-	.macro	mmid, rd, rn
-	ldr	\rd, [\rn, #MM_CONTEXT_ID]
-	.endm
-
-/*
- * dcache_line_size - get the minimum D-cache line size from the CTR register.
- */
-	.macro	dcache_line_size, reg, tmp
-	mrs	\tmp, ctr_el0			// read CTR
-	ubfm	\tmp, \tmp, #16, #19		// cache line size encoding
-	mov	\reg, #4			// bytes per word
-	lsl	\reg, \reg, \tmp		// actual cache line size
-	.endm
-
-/*
- * icache_line_size - get the minimum I-cache line size from the CTR register.
- */
-	.macro	icache_line_size, reg, tmp
-	mrs	\tmp, ctr_el0			// read CTR
-	and	\tmp, \tmp, #0xf		// cache line size encoding
-	mov	\reg, #4			// bytes per word
-	lsl	\reg, \reg, \tmp		// actual cache line size
-	.endm
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 7467199..b767032 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -25,10 +25,9 @@
 #include <asm/hwcap.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/pgtable.h>
+#include <asm/proc-macros.S>
 #include <asm/virt.h>
 
-#include "proc-macros.S"
-
 #ifdef CONFIG_ARM64_64K_PAGES
 #define TCR_TG_FLAGS	TCR_TG0_64K | TCR_TG1_64K
 #else
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH 7/7] arm64/kexec: Enable kexec in the arm64 defconfig
  2014-09-25  0:23 [PATCH 0/7] arm64 kexec kernel patches V3 Geoff Levand
                   ` (3 preceding siblings ...)
  2014-09-25  0:23 ` [PATCH 1/7] arm64/kvm: Fix assembler compatibility of macros Geoff Levand
@ 2014-09-25  0:23 ` Geoff Levand
  2014-09-25  0:23 ` [PATCH 5/7] arm64: Move proc-macros.S to include/asm Geoff Levand
                   ` (5 subsequent siblings)
  10 siblings, 0 replies; 65+ messages in thread
From: Geoff Levand @ 2014-09-25  0:23 UTC (permalink / raw)
  To: linux-arm-kernel

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/configs/defconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index d92ef3c..ebf8b3f 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -39,6 +39,7 @@ CONFIG_PREEMPT=y
 CONFIG_KSM=y
 CONFIG_TRANSPARENT_HUGEPAGE=y
 CONFIG_CMA=y
+CONFIG_KEXEC=y
 CONFIG_CMDLINE="console=ttyAMA0"
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
 CONFIG_COMPAT=y
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH 6/7] arm64/kexec: Add core kexec support
  2014-09-25  0:23 ` [PATCH 6/7] arm64/kexec: Add core kexec support Geoff Levand
@ 2014-09-25 18:28   ` Vivek Goyal
  2014-09-25 19:02     ` Geoff Levand
  2014-09-30 18:18   ` Vivek Goyal
  2014-09-30 23:59   ` [PATCH V2 " Geoff Levand
  2 siblings, 1 reply; 65+ messages in thread
From: Vivek Goyal @ 2014-09-25 18:28 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, Sep 25, 2014 at 12:23:27AM +0000, Geoff Levand wrote:
[..]
> +void machine_kexec(struct kimage *image)
> +{
> +	phys_addr_t reboot_code_buffer_phys;
> +	void *reboot_code_buffer;
> +
> +	BUG_ON(num_online_cpus() > 1);
> +
> +	kexec_kimage_head = image->head;
> +
> +	reboot_code_buffer_phys = page_to_phys(image->control_code_page);
> +	reboot_code_buffer = phys_to_virt(reboot_code_buffer_phys);
> +
> +	/*
> +	 * Copy relocate_new_kernel to the reboot_code_buffer for use
> +	 * after the kernel is shut down.
> +	 */
> +
> +	memcpy(reboot_code_buffer, relocate_new_kernel,
> +		relocate_new_kernel_size);
> +
> +	/* Flush the reboot_code_buffer in preparation for its execution. */
> +
> +	__flush_dcache_area(reboot_code_buffer, relocate_new_kernel_size);
> +
> +	/* Flush the kimage list. */
> +
> +	kexec_list_walk(NULL, image->head, kexec_list_flush_cb);
> +
> +	pr_info("Bye!\n");
> +
> +	/* Disable all DAIF exceptions. */
> +	
> +	asm volatile ("msr daifset, #0xf" : : : "memory");
> +
> +	soft_restart(reboot_code_buffer_phys);

So what is soft_restart() functionality in arm64?

Looks like it switches to identity mapped page tables and that seems
to be the reason that you are not preparing identity mapped page
tables in kexec code. I am wondering I how do you make sure that once
kexec is swapping pages (putting new kernel's pages to its destination)
at that time these identity page will not be overwritten?

I am assuming that you are jumping to purgatory with paging enabled
and whole of the memory identity mapped.

I am also curious to know what are different entry points arm64
kernel image supports and which one are you using by default.

Thanks
Vivek

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 6/7] arm64/kexec: Add core kexec support
  2014-09-25 18:28   ` Vivek Goyal
@ 2014-09-25 19:02     ` Geoff Levand
  2014-09-25 19:08       ` Vivek Goyal
  0 siblings, 1 reply; 65+ messages in thread
From: Geoff Levand @ 2014-09-25 19:02 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Vivek,

On Thu, 2014-09-25 at 14:28 -0400, Vivek Goyal wrote:
> On Thu, Sep 25, 2014 at 12:23:27AM +0000, Geoff Levand wrote:
> [..]
> > +void machine_kexec(struct kimage *image)
> > +{
> > +	phys_addr_t reboot_code_buffer_phys;
> > +	void *reboot_code_buffer;
> > +
> > +	BUG_ON(num_online_cpus() > 1);
> > +
> > +	kexec_kimage_head = image->head;
> > +
> > +	reboot_code_buffer_phys = page_to_phys(image->control_code_page);
> > +	reboot_code_buffer = phys_to_virt(reboot_code_buffer_phys);
> > +
> > +	/*
> > +	 * Copy relocate_new_kernel to the reboot_code_buffer for use
> > +	 * after the kernel is shut down.
> > +	 */
> > +
> > +	memcpy(reboot_code_buffer, relocate_new_kernel,
> > +		relocate_new_kernel_size);
> > +
> > +	/* Flush the reboot_code_buffer in preparation for its execution. */
> > +
> > +	__flush_dcache_area(reboot_code_buffer, relocate_new_kernel_size);
> > +
> > +	/* Flush the kimage list. */
> > +
> > +	kexec_list_walk(NULL, image->head, kexec_list_flush_cb);
> > +
> > +	pr_info("Bye!\n");
> > +
> > +	/* Disable all DAIF exceptions. */
> > +	
> > +	asm volatile ("msr daifset, #0xf" : : : "memory");
> > +
> > +	soft_restart(reboot_code_buffer_phys);
> 
> So what is soft_restart() functionality in arm64?

soft_restart() basically turns off the MMU and data caches, then jumps
to the address passed to it, reboot_code_buffer_phys here.
 
> Looks like it switches to identity mapped page tables and that seems
> to be the reason that you are not preparing identity mapped page
> tables in kexec code. I am wondering I how do you make sure that once
> kexec is swapping pages (putting new kernel's pages to its destination)
> at that time these identity page will not be overwritten?
> 
> I am assuming that you are jumping to purgatory with paging enabled
> and whole of the memory identity mapped.

The identity map is just used to turn off the MMU.  soft_restart() is in
that identity mapping, and once it shuts off the MMU it jumps to the
physical address of relocate_kernel, which uses physical addressing to
do the copy.

> I am also curious to know what are different entry points arm64
> kernel image supports and which one are you using by default.

The arm64 kernel as a single entry, the start of the image.  See
Documentation/arm64/booting.txt.

-Geoff

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 6/7] arm64/kexec: Add core kexec support
  2014-09-25 19:02     ` Geoff Levand
@ 2014-09-25 19:08       ` Vivek Goyal
  0 siblings, 0 replies; 65+ messages in thread
From: Vivek Goyal @ 2014-09-25 19:08 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, Sep 25, 2014 at 12:02:51PM -0700, Geoff Levand wrote:
> Hi Vivek,
> 
> On Thu, 2014-09-25 at 14:28 -0400, Vivek Goyal wrote:
> > On Thu, Sep 25, 2014 at 12:23:27AM +0000, Geoff Levand wrote:
> > [..]
> > > +void machine_kexec(struct kimage *image)
> > > +{
> > > +	phys_addr_t reboot_code_buffer_phys;
> > > +	void *reboot_code_buffer;
> > > +
> > > +	BUG_ON(num_online_cpus() > 1);
> > > +
> > > +	kexec_kimage_head = image->head;
> > > +
> > > +	reboot_code_buffer_phys = page_to_phys(image->control_code_page);
> > > +	reboot_code_buffer = phys_to_virt(reboot_code_buffer_phys);
> > > +
> > > +	/*
> > > +	 * Copy relocate_new_kernel to the reboot_code_buffer for use
> > > +	 * after the kernel is shut down.
> > > +	 */
> > > +
> > > +	memcpy(reboot_code_buffer, relocate_new_kernel,
> > > +		relocate_new_kernel_size);
> > > +
> > > +	/* Flush the reboot_code_buffer in preparation for its execution. */
> > > +
> > > +	__flush_dcache_area(reboot_code_buffer, relocate_new_kernel_size);
> > > +
> > > +	/* Flush the kimage list. */
> > > +
> > > +	kexec_list_walk(NULL, image->head, kexec_list_flush_cb);
> > > +
> > > +	pr_info("Bye!\n");
> > > +
> > > +	/* Disable all DAIF exceptions. */
> > > +	
> > > +	asm volatile ("msr daifset, #0xf" : : : "memory");
> > > +
> > > +	soft_restart(reboot_code_buffer_phys);
> > 
> > So what is soft_restart() functionality in arm64?
> 
> soft_restart() basically turns off the MMU and data caches, then jumps
> to the address passed to it, reboot_code_buffer_phys here.
>  
> > Looks like it switches to identity mapped page tables and that seems
> > to be the reason that you are not preparing identity mapped page
> > tables in kexec code. I am wondering I how do you make sure that once
> > kexec is swapping pages (putting new kernel's pages to its destination)
> > at that time these identity page will not be overwritten?
> > 
> > I am assuming that you are jumping to purgatory with paging enabled
> > and whole of the memory identity mapped.
> 
> The identity map is just used to turn off the MMU.  soft_restart() is in
> that identity mapping, and once it shuts off the MMU it jumps to the
> physical address of relocate_kernel, which uses physical addressing to
> do the copy.

Hi Geoff,

Ok, thanks. I think it would be nice if this explanation appears in code
somewhere as a comment.

Being able to turn off MMU, seems to have simplified things.

> 
> > I am also curious to know what are different entry points arm64
> > kernel image supports and which one are you using by default.
> 
> The arm64 kernel as a single entry, the start of the image.  See
> Documentation/arm64/booting.txt.

I will go through it.

Thanks
Vivek

> 
> -Geoff
> 

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 6/7] arm64/kexec: Add core kexec support
  2014-09-25  0:23 ` [PATCH 6/7] arm64/kexec: Add core kexec support Geoff Levand
  2014-09-25 18:28   ` Vivek Goyal
@ 2014-09-30 18:18   ` Vivek Goyal
  2014-09-30 19:54     ` Geoff Levand
  2014-09-30 23:59   ` [PATCH V2 " Geoff Levand
  2 siblings, 1 reply; 65+ messages in thread
From: Vivek Goyal @ 2014-09-30 18:18 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, Sep 25, 2014 at 12:23:27AM +0000, Geoff Levand wrote:

[..]
> diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
> new file mode 100644
> index 0000000..22d185c
> --- /dev/null
> +++ b/arch/arm64/kernel/machine_kexec.c
> @@ -0,0 +1,183 @@
> +/*
> + * kexec for arm64
> + *
> + * Copyright (C) Linaro.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include <linux/kexec.h>
> +#include <linux/of_fdt.h>
> +#include <linux/slab.h>
> +#include <linux/uaccess.h>
> +
> +#include <asm/cacheflush.h>
> +#include <asm/system_misc.h>
> +
> +/* Global variables for the relocate_kernel routine. */
> +
> +extern const unsigned char relocate_new_kernel[];
> +extern const unsigned long relocate_new_kernel_size;
> +extern unsigned long kexec_dtb_addr;
> +extern unsigned long kexec_kimage_head;
> +extern unsigned long kexec_kimage_start;
> +
> +/**
> + * kexec_list_walk - Helper to walk the kimage page list.
> + */
> +
> +static void kexec_list_walk(void *ctx, unsigned long kimage_head,
> +	void (*cb)(void *ctx, unsigned int flag, void *addr, void *dest))
> +{
> +	void *dest;
> +	unsigned long *entry;

Hi Geoff,

I see only one user of this function, kexec_list_flush_cb(). So why
not directly embed needed logic in kexec_list_flush_cb() instead of
implementing a generic function. It would be simpler as you seem to
be flushing dcache only for SOURCE and IND pages and rest you 
can simply ignore.

> +
> +	for (entry = &kimage_head, dest = NULL; ; entry++) {
> +		unsigned int flag = *entry & 
> +			(IND_DESTINATION | IND_INDIRECTION | IND_DONE |
> +			IND_SOURCE);
> +		void *addr = phys_to_virt(*entry & PAGE_MASK);
> +
> +		switch (flag) {
> +		case IND_INDIRECTION:
> +			entry = (unsigned long *)addr - 1;
> +			cb(ctx, flag, addr, NULL);
> +			break;
> +		case IND_DESTINATION:
> +			dest = addr;
> +			cb(ctx, flag, addr, NULL);
> +			break;
> +		case IND_SOURCE:
> +			cb(ctx, flag, addr, dest);
> +			dest += PAGE_SIZE;
> +			break;
> +		case IND_DONE:
> +			cb(ctx, flag , NULL, NULL);
> +			return;
> +		default:
> +			break;
> +		}
> +	}
> +}
> +
> +/**
> + * kexec_is_dtb - Helper routine to check the device tree header signature.
> + */
> +
> +static bool kexec_is_dtb(const void *dtb)
> +{
> +	__be32 magic;
> +
> +	return get_user(magic, (__be32 *)dtb) ? false :
> +		(be32_to_cpu(magic) == OF_DT_HEADER);
> +}
> +
> +/**
> + * kexec_find_dtb_seg - Helper routine to find the dtb segment.
> + */
> +
> +static const struct kexec_segment *kexec_find_dtb_seg(
> +	const struct kimage *image)
> +{
> +	int i;
> +
> +	for (i = 0; i < image->nr_segments; i++) {
> +		if (kexec_is_dtb(image->segment[i].buf))
> +			return &image->segment[i];
> +	}
> +
> +	return NULL;
> +}

So this implementation makes passing dtb mandatory. So it will not work
with ACPI?

Where is dtb present? How is it passed to first kernel? Can it still
be around in memory and second kernel can access it?

I mean in ACPI world on x86, all the ACPI info is still present and second
kernel can access it without it being explicitly to second kernel in
memory. Can something similar happen for dtb?

[..]
> +/**
> + * kexec_list_flush_cb - Callback to flush the kimage list to PoC.
> + */
> +
> +static void kexec_list_flush_cb(void *ctx , unsigned int flag,
> +	void *addr, void *dest)
			  ^^^

Nobody seems to be making use of dest. So why introduce it?

> +{
> +	switch (flag) {
> +	case IND_INDIRECTION:
> +	case IND_SOURCE:
> +		__flush_dcache_area(addr, PAGE_SIZE);
> +		break;

So what does __flush_dcache_area() do? Flush data caches. IIUC, addr
is virtual address at this point of time. While copying pages and
walking through the list, I am assuming you have switched off page
tables and you are in some kind of 1:1 physical mode. So how did
flushing data caches related to a virtual address help. I guess we
are not even accessing that virtual address now. 
 
[..]
> --- /dev/null
> +++ b/arch/arm64/kernel/relocate_kernel.S
> @@ -0,0 +1,183 @@
> +/*
> + * kexec for arm64
> + *
> + * Copyright (C) Linaro.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include <asm/assembler.h>
> +#include <asm/kexec.h>
> +#include <asm/memory.h>
> +#include <asm/page.h>
> +#include <asm/proc-macros.S>
> +
> +/* The list entry flags. */
> +
> +#define IND_DESTINATION_BIT 0
> +#define IND_INDIRECTION_BIT 1
> +#define IND_DONE_BIT        2
> +#define IND_SOURCE_BIT      3

I thought you had some patches to move these into generic header file. You
got rid of those patches?

> +
> +/*
> + * relocate_new_kernel - Put the 2nd stage kernel image in place and boot it.
> + *
> + * The memory that the old kernel occupies may be overwritten when coping the
> + * new kernel to its final location.  To assure that the relocate_new_kernel
> + * routine which does that copy is not overwritten all code and data needed
> + * by relocate_new_kernel must be between the symbols relocate_new_kernel and
> + * relocate_new_kernel_end.  The machine_kexec() routine will copy
> + * relocate_new_kernel to the kexec control_code_page, a special page which
> + * has been set up to be preserved during the kernel copy operation.
> + */
> +
> +.globl relocate_new_kernel
> +relocate_new_kernel:
> +
> +	/* Setup the list loop variables. */
> +
> +	ldr	x18, kexec_kimage_head		/* x18 = list entry */
> +	dcache_line_size x17, x0		/* x17 = dcache line size */
> +	mov	x16, xzr			/* x16 = segment start */
> +	mov	x15, xzr			/* x15 = entry ptr */
> +	mov	x14, xzr			/* x14 = copy dest */
> +
> +	/* Check if the new kernel needs relocation. */

What's "relocation" in this  context. I guess you are checking if new
kernel needs to be moved to destination location or not.

[..]
> +/*
> + * kexec_dtb_addr - Physical address of the new kernel's device tree.
> + */
> +
> +.globl kexec_dtb_addr
> +kexec_dtb_addr:
> +	.quad	0x0

As these gloabls are very arm64 specific, will it make sense to prefix
arm64_ before these. arm64_kexec_dtb_addr. Or arch_kexec_dtb_addr.


> +
> +/*
> + * kexec_kimage_head - Copy of image->head, the list of kimage entries.
> + */
> +
> +.globl kexec_kimage_head
> +kexec_kimage_head:
> +	.quad	0x0

Same here. How about arch_kexec_kimage_head.

> +
> +/*
> + * kexec_kimage_start - Copy of image->start, the entry point of the new kernel.
> + */
> +
> +.globl kexec_kimage_start
> +kexec_kimage_start:
> +	.quad	0x0

arch_kexec_kimage_start.

Thanks
Vivek

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 6/7] arm64/kexec: Add core kexec support
  2014-09-30 18:18   ` Vivek Goyal
@ 2014-09-30 19:54     ` Geoff Levand
  2014-10-01 14:56       ` Vivek Goyal
  2014-10-01 16:16       ` Mark Rutland
  0 siblings, 2 replies; 65+ messages in thread
From: Geoff Levand @ 2014-09-30 19:54 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Vivek,

On Tue, 2014-09-30 at 14:18 -0400, Vivek Goyal wrote:
> On Thu, Sep 25, 2014 at 12:23:27AM +0000, Geoff Levand wrote:
> 
> [..]
> > diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
> > new file mode 100644
> > index 0000000..22d185c
> > --- /dev/null
> > +++ b/arch/arm64/kernel/machine_kexec.c
> > @@ -0,0 +1,183 @@
> > +/*
> > + * kexec for arm64
> > + *
> > + * Copyright (C) Linaro.
> > + *
> > + * This program is free software; you can redistribute it and/or modify
> > + * it under the terms of the GNU General Public License version 2 as
> > + * published by the Free Software Foundation.
> > + */
> > +
> > +#include <linux/kexec.h>
> > +#include <linux/of_fdt.h>
> > +#include <linux/slab.h>
> > +#include <linux/uaccess.h>
> > +
> > +#include <asm/cacheflush.h>
> > +#include <asm/system_misc.h>
> > +
> > +/* Global variables for the relocate_kernel routine. */
> > +
> > +extern const unsigned char relocate_new_kernel[];
> > +extern const unsigned long relocate_new_kernel_size;
> > +extern unsigned long kexec_dtb_addr;
> > +extern unsigned long kexec_kimage_head;
> > +extern unsigned long kexec_kimage_start;
> > +
> > +/**
> > + * kexec_list_walk - Helper to walk the kimage page list.
> > + */
> > +
> > +static void kexec_list_walk(void *ctx, unsigned long kimage_head,
> > +	void (*cb)(void *ctx, unsigned int flag, void *addr, void *dest))
> > +{
> > +	void *dest;
> > +	unsigned long *entry;
> 
> Hi Geoff,
> 
> I see only one user of this function, kexec_list_flush_cb(). So why
> not directly embed needed logic in kexec_list_flush_cb() instead of
> implementing a generic function. It would be simpler as you seem to
> be flushing dcache only for SOURCE and IND pages and rest you 
> can simply ignore.

I have an additional debugging patch that uses this to dump the list.
I can move this routine into that patch and put in a simpler version
here.

> > +
> > +	for (entry = &kimage_head, dest = NULL; ; entry++) {
> > +		unsigned int flag = *entry & 
> > +			(IND_DESTINATION | IND_INDIRECTION | IND_DONE |
> > +			IND_SOURCE);
> > +		void *addr = phys_to_virt(*entry & PAGE_MASK);
> > +
> > +		switch (flag) {
> > +		case IND_INDIRECTION:
> > +			entry = (unsigned long *)addr - 1;
> > +			cb(ctx, flag, addr, NULL);
> > +			break;
> > +		case IND_DESTINATION:
> > +			dest = addr;
> > +			cb(ctx, flag, addr, NULL);
> > +			break;
> > +		case IND_SOURCE:
> > +			cb(ctx, flag, addr, dest);
> > +			dest += PAGE_SIZE;
> > +			break;
> > +		case IND_DONE:
> > +			cb(ctx, flag , NULL, NULL);
> > +			return;
> > +		default:
> > +			break;
> > +		}
> > +	}
> > +}
> > +
> > +/**
> > + * kexec_is_dtb - Helper routine to check the device tree header signature.
> > + */
> > +
> > +static bool kexec_is_dtb(const void *dtb)
> > +{
> > +	__be32 magic;
> > +
> > +	return get_user(magic, (__be32 *)dtb) ? false :
> > +		(be32_to_cpu(magic) == OF_DT_HEADER);
> > +}
> > +
> > +/**
> > + * kexec_find_dtb_seg - Helper routine to find the dtb segment.
> > + */
> > +
> > +static const struct kexec_segment *kexec_find_dtb_seg(
> > +	const struct kimage *image)
> > +{
> > +	int i;
> > +
> > +	for (i = 0; i < image->nr_segments; i++) {
> > +		if (kexec_is_dtb(image->segment[i].buf))
> > +			return &image->segment[i];
> > +	}
> > +
> > +	return NULL;
> > +}
> 
> So this implementation makes passing dtb mandatory. So it will not work
> with ACPI?

I have not yet considered ACPI.  It will most likely need to have
something done differently.  Secure boot will also need something
different, and I expect it will use your new kexec_file_load().

> Where is dtb present? How is it passed to first kernel? Can it still
> be around in memory and second kernel can access it?

The user space program (kexec-tools, etc.) passes a dtb.  That dtb
could be a copy of the currently one, or a new one specified by
the user.

> I mean in ACPI world on x86, all the ACPI info is still present and second
> kernel can access it without it being explicitly to second kernel in
> memory. Can something similar happen for dtb?

This implementation leaves the preparation of the 2nd stage dtb to
the user space program, as it can prepare that dtb with the proper
kernel command line property, initrd properties etc.

> [..]
> > +/**
> > + * kexec_list_flush_cb - Callback to flush the kimage list to PoC.
> > + */
> > +
> > +static void kexec_list_flush_cb(void *ctx , unsigned int flag,
> > +	void *addr, void *dest)
> 			  ^^^
> 
> Nobody seems to be making use of dest. So why introduce it?

As mentioned, I used this for dumping the list, and that callback
used dest.

> > +{
> > +	switch (flag) {
> > +	case IND_INDIRECTION:
> > +	case IND_SOURCE:
> > +		__flush_dcache_area(addr, PAGE_SIZE);
> > +		break;
> 
> So what does __flush_dcache_area() do? Flush data caches. IIUC, addr
> is virtual address at this point of time. While copying pages and
> walking through the list, I am assuming you have switched off page
> tables and you are in some kind of 1:1 physical mode. So how did
> flushing data caches related to a virtual address help. I guess we
> are not even accessing that virtual address now.

__flush_dcache_area(), and the underling aarch64 civac instruction
operate on virtual addresses.  Here we are still running with the
MMU on and the identity mapping has not yet been enabled.  This is
the sequence:

  flush dcache -> turn off MMU, dcache -> access memory (PoC) directly 

> [..]
> > --- /dev/null
> > +++ b/arch/arm64/kernel/relocate_kernel.S
> > @@ -0,0 +1,183 @@
> > +/*
> > + * kexec for arm64
> > + *
> > + * Copyright (C) Linaro.
> > + *
> > + * This program is free software; you can redistribute it and/or modify
> > + * it under the terms of the GNU General Public License version 2 as
> > + * published by the Free Software Foundation.
> > + */
> > +
> > +#include <asm/assembler.h>
> > +#include <asm/kexec.h>
> > +#include <asm/memory.h>
> > +#include <asm/page.h>
> > +#include <asm/proc-macros.S>
> > +
> > +/* The list entry flags. */
> > +
> > +#define IND_DESTINATION_BIT 0
> > +#define IND_INDIRECTION_BIT 1
> > +#define IND_DONE_BIT        2
> > +#define IND_SOURCE_BIT      3
> 
> I thought you had some patches to move these into generic header file. You
> got rid of those patches?

I will have another patch to remove these when the kexec patches get
merged, or will just remove these if my kexec patches show up in
Catalin's arm64 tree.

> > +
> > +/*
> > + * relocate_new_kernel - Put the 2nd stage kernel image in place and boot it.
> > + *
> > + * The memory that the old kernel occupies may be overwritten when coping the
> > + * new kernel to its final location.  To assure that the relocate_new_kernel
> > + * routine which does that copy is not overwritten all code and data needed
> > + * by relocate_new_kernel must be between the symbols relocate_new_kernel and
> > + * relocate_new_kernel_end.  The machine_kexec() routine will copy
> > + * relocate_new_kernel to the kexec control_code_page, a special page which
> > + * has been set up to be preserved during the kernel copy operation.
> > + */
> > +
> > +.globl relocate_new_kernel
> > +relocate_new_kernel:
> > +
> > +	/* Setup the list loop variables. */
> > +
> > +	ldr	x18, kexec_kimage_head		/* x18 = list entry */
> > +	dcache_line_size x17, x0		/* x17 = dcache line size */
> > +	mov	x16, xzr			/* x16 = segment start */
> > +	mov	x15, xzr			/* x15 = entry ptr */
> > +	mov	x14, xzr			/* x14 = copy dest */
> > +
> > +	/* Check if the new kernel needs relocation. */
> 
> What's "relocation" in this  context. I guess you are checking if new
> kernel needs to be moved to destination location or not.

Yes, relocate means scatter-gather 'copy' here.

> [..]
> > +/*
> > + * kexec_dtb_addr - Physical address of the new kernel's device tree.
> > + */
> > +
> > +.globl kexec_dtb_addr
> > +kexec_dtb_addr:
> > +	.quad	0x0
> 
> As these gloabls are very arm64 specific, will it make sense to prefix
> arm64_ before these. arm64_kexec_dtb_addr. Or arch_kexec_dtb_addr.

I could put an arm64_ prefix on, but this file and these variables are
arm64 specific so I thought it unnecessary.

I don't think arch_ would be right, as I don't expect any other arch to
have these variables.

I'll post a new patch version soon.

-Geoff

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 0/7] arm64 kexec kernel patches V3
  2014-09-25  0:23 [PATCH 0/7] arm64 kexec kernel patches V3 Geoff Levand
                   ` (6 preceding siblings ...)
  2014-09-25  0:23 ` [PATCH 6/7] arm64/kexec: Add core kexec support Geoff Levand
@ 2014-09-30 20:29 ` Vivek Goyal
  2014-09-30 21:27   ` Geoff Levand
  2014-10-01 15:19 ` Vivek Goyal
                   ` (2 subsequent siblings)
  10 siblings, 1 reply; 65+ messages in thread
From: Vivek Goyal @ 2014-09-30 20:29 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, Sep 25, 2014 at 12:23:26AM +0000, Geoff Levand wrote:

[..]
> To load a second stage kernel and execute a kexec re-boot on arm64 my patches to
> kexec-tools [2], which have not yet been merged upstream, are needed.
> 
> This series does not include some re-work of the spin-table CPU enable method
> that is needed to support it,

Hi Geoff,

How do I figure out if my system has spin table enable method or psci
enable method. Can one change it. I wanted to test your patches.

Thanks
Vivek

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 0/7] arm64 kexec kernel patches V3
  2014-09-30 20:29 ` [PATCH 0/7] arm64 kexec kernel patches V3 Vivek Goyal
@ 2014-09-30 21:27   ` Geoff Levand
  2014-10-02 19:08     ` Vivek Goyal
  0 siblings, 1 reply; 65+ messages in thread
From: Geoff Levand @ 2014-09-30 21:27 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Vivek,

On Tue, 2014-09-30 at 16:29 -0400, Vivek Goyal wrote:
> On Thu, Sep 25, 2014 at 12:23:26AM +0000, Geoff Levand wrote:
> 
> [..]
> > To load a second stage kernel and execute a kexec re-boot on arm64 my patches to
> > kexec-tools [2], which have not yet been merged upstream, are needed.
> > 
> > This series does not include some re-work of the spin-table CPU enable method
> > that is needed to support it,
> 
> How do I figure out if my system has spin table enable method or psci
> enable method. Can one change it. I wanted to test your patches.

The enable method is a function the firmware/bootloader provides.
Multiple methods may be supported.  The boot-wrapper-aarch64
build defaults to spin-table, but has the configure option --enable-psci.

For a running system you can check the device tree:

  cat /proc/device-tree/cpus/cpu\@0/enable-method | hexdump -C

Or the dmsg:

  dmesg | egrep -i 'psci'

Or use the --debug option to kexec:

  kexec --debug --load /boot/vmlinux.strip ...

read_cpu_info:398: cpu-0 (/cpus/cpu at 0): hwid-0, 'psci'

-Geoff

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH V2 6/7] arm64/kexec: Add core kexec support
  2014-09-25  0:23 ` [PATCH 6/7] arm64/kexec: Add core kexec support Geoff Levand
  2014-09-25 18:28   ` Vivek Goyal
  2014-09-30 18:18   ` Vivek Goyal
@ 2014-09-30 23:59   ` Geoff Levand
  2 siblings, 0 replies; 65+ messages in thread
From: Geoff Levand @ 2014-09-30 23:59 UTC (permalink / raw)
  To: linux-arm-kernel

Add three new files, kexec.h, machine_kexec.c and relocate_kernel.S to the
arm64 architecture that add support for the kexec re-boot mechanism
(CONFIG_KEXEC) on arm64 platforms.

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
V2: 
  o Add comment about soft_restart().
  o Fold kexec_list_walk() and flush callback into kexec_list_flush().
  o Add arm64_ prefix to global variables.

 arch/arm64/Kconfig                  |   9 ++
 arch/arm64/include/asm/kexec.h      |  47 +++++++++
 arch/arm64/kernel/Makefile          |   1 +
 arch/arm64/kernel/machine_kexec.c   | 171 +++++++++++++++++++++++++++++++++
 arch/arm64/kernel/relocate_kernel.S | 184 ++++++++++++++++++++++++++++++++++++
 include/uapi/linux/kexec.h          |   1 +
 6 files changed, 413 insertions(+)
 create mode 100644 arch/arm64/include/asm/kexec.h
 create mode 100644 arch/arm64/kernel/machine_kexec.c
 create mode 100644 arch/arm64/kernel/relocate_kernel.S

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index f0d3a2d..af03449 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -313,6 +313,15 @@ config ARCH_HAS_CACHE_LINE_SIZE
 
 source "mm/Kconfig"
 
+config KEXEC
+	depends on (!SMP || PM_SLEEP_SMP)
+	bool "kexec system call"
+	---help---
+	  kexec is a system call that implements the ability to shutdown your
+	  current kernel, and to start another kernel.  It is like a reboot
+	  but it is independent of the system firmware.   And like a reboot
+	  you can start any kernel with it, not just Linux.
+
 config XEN_DOM0
 	def_bool y
 	depends on XEN
diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
new file mode 100644
index 0000000..e7bd7ab
--- /dev/null
+++ b/arch/arm64/include/asm/kexec.h
@@ -0,0 +1,47 @@
+/*
+ * kexec for arm64
+ *
+ * Copyright (C) Linaro.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#if !defined(_ARM64_KEXEC_H)
+#define _ARM64_KEXEC_H
+
+/* Maximum physical address we can use pages from */
+
+#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
+
+/* Maximum address we can reach in physical address mode */
+
+#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
+
+/* Maximum address we can use for the control code buffer */
+
+#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL)
+
+#define KEXEC_CONTROL_PAGE_SIZE	4096
+
+#define KEXEC_ARCH KEXEC_ARCH_ARM64
+
+#if !defined(__ASSEMBLY__)
+
+/**
+ * crash_setup_regs() - save registers for the panic kernel
+ *
+ * @newregs: registers are saved here
+ * @oldregs: registers to be saved (may be %NULL)
+ */
+
+static inline void crash_setup_regs(struct pt_regs *newregs,
+				    struct pt_regs *oldregs)
+{
+	/* Empty routine needed to avoid build errors. */
+}
+
+#endif /* !defined(__ASSEMBLY__) */
+
+#endif
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 6e9538c..77a7351 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -30,6 +30,7 @@ arm64-obj-$(CONFIG_CPU_IDLE)		+= cpuidle.o
 arm64-obj-$(CONFIG_JUMP_LABEL)		+= jump_label.o
 arm64-obj-$(CONFIG_KGDB)		+= kgdb.o
 arm64-obj-$(CONFIG_EFI)			+= efi.o efi-stub.o efi-entry.o
+arm64-obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o
 
 obj-y					+= $(arm64-obj-y) vdso/
 obj-m					+= $(arm64-obj-m)
diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
new file mode 100644
index 0000000..8924bb0
--- /dev/null
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -0,0 +1,171 @@
+/*
+ * kexec for arm64
+ *
+ * Copyright (C) Linaro.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kexec.h>
+#include <linux/of_fdt.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+#include <asm/cacheflush.h>
+#include <asm/system_misc.h>
+
+/* Global variables for the relocate_kernel routine. */
+
+extern const unsigned char relocate_new_kernel[];
+extern const unsigned long relocate_new_kernel_size;
+extern unsigned long arm64_kexec_dtb_addr;
+extern unsigned long arm64_kexec_kimage_head;
+extern unsigned long arm64_kexec_kimage_start;
+
+/**
+ * kexec_is_dtb - Helper routine to check the device tree header signature.
+ */
+
+static bool kexec_is_dtb(const void *dtb)
+{
+	__be32 magic;
+
+	return get_user(magic, (__be32 *)dtb) ? false :
+		(be32_to_cpu(magic) == OF_DT_HEADER);
+}
+
+/**
+ * kexec_find_dtb_seg - Helper routine to find the dtb segment.
+ */
+
+static const struct kexec_segment *kexec_find_dtb_seg(
+	const struct kimage *image)
+{
+	int i;
+
+	for (i = 0; i < image->nr_segments; i++) {
+		if (kexec_is_dtb(image->segment[i].buf))
+			return &image->segment[i];
+	}
+
+	return NULL;
+}
+
+void machine_kexec_cleanup(struct kimage *image)
+{
+	/* Empty routine needed to avoid build errors. */
+}
+
+/**
+ * machine_kexec_prepare - Prepare for a kexec reboot.
+ *
+ * Called from the core kexec code when a kernel image is loaded.
+ */
+
+int machine_kexec_prepare(struct kimage *image)
+{
+	const struct kexec_segment *dtb_seg;
+
+	dtb_seg = kexec_find_dtb_seg(image);
+
+	if (!dtb_seg)
+		return -EINVAL;
+
+	arm64_kexec_dtb_addr = dtb_seg->mem;
+	arm64_kexec_kimage_start = image->start;
+
+	return 0;
+}
+
+/**
+ * kexec_list_flush - Helper to flush the kimage list to PoC.
+ */
+
+static void kexec_list_flush(unsigned long kimage_head)
+{
+	void *dest;
+	unsigned long *entry;
+
+	for (entry = &kimage_head, dest = NULL; ; entry++) {
+		unsigned int flag = *entry & 
+			(IND_DESTINATION | IND_INDIRECTION | IND_DONE |
+			IND_SOURCE);
+		void *addr = phys_to_virt(*entry & PAGE_MASK);
+
+		switch (flag) {
+		case IND_INDIRECTION:
+			entry = (unsigned long *)addr - 1;
+			__flush_dcache_area(addr, PAGE_SIZE);
+			break;
+		case IND_DESTINATION:
+			dest = addr;
+			break;
+		case IND_SOURCE:
+			__flush_dcache_area(addr, PAGE_SIZE);
+			dest += PAGE_SIZE;
+			break;
+		case IND_DONE:
+			return;
+		default:
+			break;
+		}
+	}
+}
+
+/**
+ * machine_kexec - Do the kexec reboot.
+ *
+ * Called from the core kexec code for a sys_reboot with LINUX_REBOOT_CMD_KEXEC.
+ */
+
+void machine_kexec(struct kimage *image)
+{
+	phys_addr_t reboot_code_buffer_phys;
+	void *reboot_code_buffer;
+
+	BUG_ON(num_online_cpus() > 1);
+
+	arm64_kexec_kimage_head = image->head;
+
+	reboot_code_buffer_phys = page_to_phys(image->control_code_page);
+	reboot_code_buffer = phys_to_virt(reboot_code_buffer_phys);
+
+	/*
+	 * Copy relocate_new_kernel to the reboot_code_buffer for use
+	 * after the kernel is shut down.
+	 */
+
+	memcpy(reboot_code_buffer, relocate_new_kernel,
+		relocate_new_kernel_size);
+
+	/* Flush the reboot_code_buffer in preparation for its execution. */
+
+	__flush_dcache_area(reboot_code_buffer, relocate_new_kernel_size);
+
+	/* Flush the kimage list. */
+
+	kexec_list_flush(image->head);
+
+	pr_info("Bye!\n");
+
+	/* Disable all DAIF exceptions. */
+	
+	asm volatile ("msr daifset, #0xf" : : : "memory");
+
+	/*
+	 * soft_restart() will shutdown the MMU, disable data caches, then
+	 * transfer control to the reboot_code_buffer which contains a copy of
+	 * the relocate_new_kernel routine.  relocate_new_kernel will use
+	 * physical addressing to relocate the new kernel to its final position
+	 * and then will transfer control to the entry point of the new kernel.
+	 */
+
+	soft_restart(reboot_code_buffer_phys);
+}
+
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+	/* Empty routine needed to avoid build errors. */
+}
diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S
new file mode 100644
index 0000000..ac68322
--- /dev/null
+++ b/arch/arm64/kernel/relocate_kernel.S
@@ -0,0 +1,184 @@
+/*
+ * kexec for arm64
+ *
+ * Copyright (C) Linaro.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/assembler.h>
+#include <asm/kexec.h>
+#include <asm/memory.h>
+#include <asm/page.h>
+#include <asm/proc-macros.S>
+
+/* The list entry flags. */
+
+#define IND_DESTINATION_BIT 0
+#define IND_INDIRECTION_BIT 1
+#define IND_DONE_BIT        2
+#define IND_SOURCE_BIT      3
+
+/*
+ * relocate_new_kernel - Put the 2nd stage kernel image in place and boot it.
+ *
+ * The memory that the old kernel occupies may be overwritten when coping the
+ * new kernel to its final location.  To assure that the relocate_new_kernel
+ * routine which does that copy is not overwritten all code and data needed
+ * by relocate_new_kernel must be between the symbols relocate_new_kernel and
+ * relocate_new_kernel_end.  The machine_kexec() routine will copy
+ * relocate_new_kernel to the kexec control_code_page, a special page which
+ * has been set up to be preserved during the kernel copy operation.
+ */
+
+.globl relocate_new_kernel
+relocate_new_kernel:
+
+	/* Setup the list loop variables. */
+
+	ldr	x18, arm64_kexec_kimage_head	/* x18 = list entry */
+	dcache_line_size x17, x0		/* x17 = dcache line size */
+	mov	x16, xzr			/* x16 = segment start */
+	mov	x15, xzr			/* x15 = entry ptr */
+	mov	x14, xzr			/* x14 = copy dest */
+
+	/* Check if the new kernel needs relocation. */
+
+	cbz	x18, .Ldone
+	tbnz	x18, IND_DONE_BIT, .Ldone
+
+.Lloop:
+	and	x13, x18, PAGE_MASK		/* x13 = addr */
+
+	/* Test the entry flags. */
+
+.Ltest_source:
+	tbz	x18, IND_SOURCE_BIT, .Ltest_indirection
+
+	/* copy_page(x20 = dest, x21 = src) */
+
+	mov x20, x14
+	mov x21, x13
+
+1:	ldp	x22, x23, [x21]
+	ldp	x24, x25, [x21, #16]
+	ldp	x26, x27, [x21, #32]
+	ldp	x28, x29, [x21, #48]
+	add	x21, x21, #64
+	stnp	x22, x23, [x20]
+	stnp	x24, x25, [x20, #16]
+	stnp	x26, x27, [x20, #32]
+	stnp	x28, x29, [x20, #48]
+	add	x20, x20, #64
+	tst	x21, #(PAGE_SIZE - 1)
+	b.ne	1b
+
+	/* dest += PAGE_SIZE */
+
+	add	x14, x14, PAGE_SIZE
+	b	.Lnext
+
+.Ltest_indirection:
+	tbz	x18, IND_INDIRECTION_BIT, .Ltest_destination
+
+	/* ptr = addr */
+
+	mov	x15, x13
+	b	.Lnext
+
+.Ltest_destination:
+	tbz	x18, IND_DESTINATION_BIT, .Lnext
+
+	/* flush segment */
+
+	bl	.Lflush
+	mov	x16, x13
+
+	/* dest = addr */
+
+	mov	x14, x13
+
+.Lnext:
+	/* entry = *ptr++ */
+
+	ldr	x18, [x15], #8
+
+	/* while (!(entry & DONE)) */
+
+	tbz	x18, IND_DONE_BIT, .Lloop
+
+.Ldone:
+	/* flush last segment */
+
+	bl	.Lflush
+
+	dsb	sy
+	isb
+	ic	ialluis
+	dsb	sy
+	isb
+
+	/* start_new_kernel */
+
+	ldr	x4, arm64_kexec_kimage_start
+	ldr	x0, arm64_kexec_dtb_addr
+	mov	x1, xzr
+	mov	x2, xzr
+	mov	x3, xzr
+	br	x4
+
+/* flush - x17 = line size, x16 = start addr, x14 = end addr. */
+
+.Lflush:
+	cbz	x16, 2f
+	mov	x0, x16
+	sub	x1, x17, #1
+	bic	x0, x0, x1
+1:	dc	civac, x0
+	add	x0, x0, x17
+	cmp	x0, x14
+	b.lo	1b
+2:	ret
+
+.align 3	/* To keep the 64-bit values below naturally aligned. */
+
+/* The machine_kexec routines set these variables. */
+
+/*
+ * arm64_kexec_dtb_addr - Physical address of the new kernel's device tree.
+ */
+
+.globl arm64_kexec_dtb_addr
+arm64_kexec_dtb_addr:
+	.quad	0x0
+
+/*
+ * arm64_kexec_kimage_head - Copy of image->head, the list of kimage entries.
+ */
+
+.globl arm64_kexec_kimage_head
+arm64_kexec_kimage_head:
+	.quad	0x0
+
+/*
+ * arm64_kexec_kimage_start - Copy of image->start, the entry point of the new
+ * kernel.
+ */
+
+.globl arm64_kexec_kimage_start
+arm64_kexec_kimage_start:
+	.quad	0x0
+
+.Lrelocate_new_kernel_end:
+
+/*
+ * relocate_new_kernel_size - Number of bytes to copy to the control_code_page.
+ */
+
+.globl relocate_new_kernel_size
+relocate_new_kernel_size:
+	.quad .Lrelocate_new_kernel_end - relocate_new_kernel
+
+.org	KEXEC_CONTROL_PAGE_SIZE
diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h
index 6925f5b..04626b9 100644
--- a/include/uapi/linux/kexec.h
+++ b/include/uapi/linux/kexec.h
@@ -39,6 +39,7 @@
 #define KEXEC_ARCH_SH      (42 << 16)
 #define KEXEC_ARCH_MIPS_LE (10 << 16)
 #define KEXEC_ARCH_MIPS    ( 8 << 16)
+#define KEXEC_ARCH_ARM64   (183 << 16)
 
 /* The artificial cap on the number of segments passed to kexec_load. */
 #define KEXEC_SEGMENT_MAX 16
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH 6/7] arm64/kexec: Add core kexec support
  2014-09-30 19:54     ` Geoff Levand
@ 2014-10-01 14:56       ` Vivek Goyal
  2014-10-03 18:35         ` Geoff Levand
  2014-10-01 16:16       ` Mark Rutland
  1 sibling, 1 reply; 65+ messages in thread
From: Vivek Goyal @ 2014-10-01 14:56 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Sep 30, 2014 at 12:54:37PM -0700, Geoff Levand wrote:

[..]
> > > +{
> > > +	switch (flag) {
> > > +	case IND_INDIRECTION:
> > > +	case IND_SOURCE:
> > > +		__flush_dcache_area(addr, PAGE_SIZE);
> > > +		break;
> > 
> > So what does __flush_dcache_area() do? Flush data caches. IIUC, addr
> > is virtual address at this point of time. While copying pages and
> > walking through the list, I am assuming you have switched off page
> > tables and you are in some kind of 1:1 physical mode. So how did
> > flushing data caches related to a virtual address help. I guess we
> > are not even accessing that virtual address now.
> 
> __flush_dcache_area(), and the underling aarch64 civac instruction
> operate on virtual addresses.  Here we are still running with the
> MMU on and the identity mapping has not yet been enabled.  This is
> the sequence:
> 
>   flush dcache -> turn off MMU, dcache -> access memory (PoC) directly 

Sorry, I don't understand that why do we need to flush dcache for source
and indirection page addresses. Some information here will help.

Thanks
Vivek

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 0/7] arm64 kexec kernel patches V3
  2014-09-25  0:23 [PATCH 0/7] arm64 kexec kernel patches V3 Geoff Levand
                   ` (7 preceding siblings ...)
  2014-09-30 20:29 ` [PATCH 0/7] arm64 kexec kernel patches V3 Vivek Goyal
@ 2014-10-01 15:19 ` Vivek Goyal
  2014-10-03 21:16   ` Geoff Levand
  2014-10-07 18:09 ` Vivek Goyal
  2014-10-09  9:22 ` Will Deacon
  10 siblings, 1 reply; 65+ messages in thread
From: Vivek Goyal @ 2014-10-01 15:19 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, Sep 25, 2014 at 12:23:26AM +0000, Geoff Levand wrote:
> Hi All,
> 
> This series adds the core support for kexec re-boots on arm64.  I have tested
> with the ARM VE fast model using various kernel config options for both the
> first and second stage kernels.

Hi Geoff,

Does this patch series work with kexec on UEFI machines?

Thanks
Vivek

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 6/7] arm64/kexec: Add core kexec support
  2014-09-30 19:54     ` Geoff Levand
  2014-10-01 14:56       ` Vivek Goyal
@ 2014-10-01 16:16       ` Mark Rutland
  2014-10-01 17:36         ` Vivek Goyal
  2014-10-01 17:47         ` Vivek Goyal
  1 sibling, 2 replies; 65+ messages in thread
From: Mark Rutland @ 2014-10-01 16:16 UTC (permalink / raw)
  To: linux-arm-kernel

[...]

> > > +/**
> > > + * kexec_is_dtb - Helper routine to check the device tree header signature.
> > > + */
> > > +
> > > +static bool kexec_is_dtb(const void *dtb)
> > > +{
> > > +	__be32 magic;
> > > +
> > > +	return get_user(magic, (__be32 *)dtb) ? false :
> > > +		(be32_to_cpu(magic) == OF_DT_HEADER);
> > > +}
> > > +
> > > +/**
> > > + * kexec_find_dtb_seg - Helper routine to find the dtb segment.
> > > + */
> > > +
> > > +static const struct kexec_segment *kexec_find_dtb_seg(
> > > +	const struct kimage *image)
> > > +{
> > > +	int i;
> > > +
> > > +	for (i = 0; i < image->nr_segments; i++) {
> > > +		if (kexec_is_dtb(image->segment[i].buf))
> > > +			return &image->segment[i];
> > > +	}
> > > +
> > > +	return NULL;
> > > +}
> > 
> > So this implementation makes passing dtb mandatory. So it will not work
> > with ACPI?
> 
> I have not yet considered ACPI.  It will most likely need to have
> something done differently.  Secure boot will also need something
> different, and I expect it will use your new kexec_file_load().

A DTB is mandatory for arm64, and is used to pass the command line,
(optionally) initrd, and other parameters, even if it doesn't contain HW
description. In the EFI case the EFI stub will create a trivial DTB if
necessary, and the kernel will detect any ACPI tables via UEFI, so the
DTB should be sufficient for ACPI.

I'm still rather unhappy about the mechanism by which the DTB is passed
by userspace and detected by the kernel, as I'd prefer that the user
explictly stated which segment they wanted to pass to the (Linux)
kernel, but that would require reworking the kexec syscall to allow
per-segment info/flags.

To me it seems that for all the talk of kexec allowing arbitrary kernels
to be booted it's really just a linux->linux reboot bridge. Does anyone
use kexec to boot something that isn't Linux?

> > Where is dtb present? How is it passed to first kernel? Can it still
> > be around in memory and second kernel can access it?
> 
> The user space program (kexec-tools, etc.) passes a dtb.  That dtb
> could be a copy of the currently one, or a new one specified by
> the user.
> 
> > I mean in ACPI world on x86, all the ACPI info is still present and second
> > kernel can access it without it being explicitly to second kernel in
> > memory. Can something similar happen for dtb?

Any ACPI tables should remain, given they'll be reserved in the UEFI
memory map. The second kernel can find them as the first kernel did, via
UEFI tables, which it will fine via the DTB.

For the DTB, reusing the original DTB is a possibility. From what I
recall, Grant seemed to prefer re-packing the existing tree as this
would allow for state destroyed at boot to be corrected for.

Regardless, being able to pass a DTB from userspace is a useful option
(especially for the Linux-as-a-bootloader approach that's been mentioned
a lot). That doesn't work for the secureboot case without a new syscall
as we can't pass a signed DTB (or any other additional objects other
than an initrd) to kexec_file_load, but disallowing the user to pass a
new DTB in that case seems reasonable.

Mark.

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 6/7] arm64/kexec: Add core kexec support
  2014-10-01 16:16       ` Mark Rutland
@ 2014-10-01 17:36         ` Vivek Goyal
  2014-10-01 17:56           ` Mark Rutland
  2014-10-01 17:47         ` Vivek Goyal
  1 sibling, 1 reply; 65+ messages in thread
From: Vivek Goyal @ 2014-10-01 17:36 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, Oct 01, 2014 at 05:16:21PM +0100, Mark Rutland wrote:

[..]
> > > So this implementation makes passing dtb mandatory. So it will not work
> > > with ACPI?
> > 
> > I have not yet considered ACPI.  It will most likely need to have
> > something done differently.  Secure boot will also need something
> > different, and I expect it will use your new kexec_file_load().
> 
> A DTB is mandatory for arm64, and is used to pass the command line,
> (optionally) initrd, and other parameters, even if it doesn't contain HW
> description. In the EFI case the EFI stub will create a trivial DTB if
> necessary, and the kernel will detect any ACPI tables via UEFI, so the
> DTB should be sufficient for ACPI.
> 
> I'm still rather unhappy about the mechanism by which the DTB is passed
> by userspace and detected by the kernel, as I'd prefer that the user
> explictly stated which segment they wanted to pass to the (Linux)
> kernel, but that would require reworking the kexec syscall to allow
> per-segment info/flags.

Yep, in this case, it would have been nice if there were per segment
flags to identify type of segment. But unfortunately we don't have. So
in the absence of that, I think putting 4 bytes as dtb magic in the
beginning of segment should work (though no ideal).

> 
> To me it seems that for all the talk of kexec allowing arbitrary kernels
> to be booted it's really just a linux->linux reboot bridge. Does anyone
> use kexec to boot something that isn't Linux?

> 
> > > Where is dtb present? How is it passed to first kernel? Can it still
> > > be around in memory and second kernel can access it?
> > 
> > The user space program (kexec-tools, etc.) passes a dtb.  That dtb
> > could be a copy of the currently one, or a new one specified by
> > the user.
> > 
> > > I mean in ACPI world on x86, all the ACPI info is still present and second
> > > kernel can access it without it being explicitly to second kernel in
> > > memory. Can something similar happen for dtb?
> 
> Any ACPI tables should remain, given they'll be reserved in the UEFI
> memory map. The second kernel can find them as the first kernel did, via
> UEFI tables, which it will fine via the DTB.
> 
> For the DTB, reusing the original DTB is a possibility. From what I
> recall, Grant seemed to prefer re-packing the existing tree as this
> would allow for state destroyed at boot to be corrected for.
> 
> Regardless, being able to pass a DTB from userspace is a useful option
> (especially for the Linux-as-a-bootloader approach that's been mentioned
> a lot). That doesn't work for the secureboot case without a new syscall
> as we can't pass a signed DTB (or any other additional objects other
> than an initrd) to kexec_file_load, but disallowing the user to pass a
> new DTB in that case seems reasonable.

Yes, kexec_file_load() will not allow passing anything except, kernel,
initrd and command line. So syscall implementation will have to resuse
the existing DTB and pass it to second kernel. 

If there are concerns w.r.t state of DTB which can be destroyed during
boot, I guess we will have to store a copy of DTB somewhere early during
boot and kexec can access that original copy during kernel load time.

Thanks
Vivek



> 
> Mark.

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 6/7] arm64/kexec: Add core kexec support
  2014-10-01 16:16       ` Mark Rutland
  2014-10-01 17:36         ` Vivek Goyal
@ 2014-10-01 17:47         ` Vivek Goyal
  2014-10-01 18:03           ` Mark Rutland
  1 sibling, 1 reply; 65+ messages in thread
From: Vivek Goyal @ 2014-10-01 17:47 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, Oct 01, 2014 at 05:16:21PM +0100, Mark Rutland wrote:

[..]
> I'm still rather unhappy about the mechanism by which the DTB is passed
> by userspace and detected by the kernel, as I'd prefer that the user
> explictly stated which segment they wanted to pass to the (Linux)
> kernel, but that would require reworking the kexec syscall to allow
> per-segment info/flags.

Why does the running kernel need to know about dtb segment.  I see following.

ldr     x0, kexec_dtb_addr

IIUC, we are loading this address in x0. Can't we do something similar
in user space with purgatory. I mean first jump to purgatory (code
compiled in user space but runs prviliged) and that code takes care
of loading x0 with right dtb addr and then jump to final kernel.

IOW, I am not able to understand that why kernel implementation needs
to know which is dtb segment.

Thanks
Vivek

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 6/7] arm64/kexec: Add core kexec support
  2014-10-01 17:36         ` Vivek Goyal
@ 2014-10-01 17:56           ` Mark Rutland
  0 siblings, 0 replies; 65+ messages in thread
From: Mark Rutland @ 2014-10-01 17:56 UTC (permalink / raw)
  To: linux-arm-kernel

> > I'm still rather unhappy about the mechanism by which the DTB is passed
> > by userspace and detected by the kernel, as I'd prefer that the user
> > explictly stated which segment they wanted to pass to the (Linux)
> > kernel, but that would require reworking the kexec syscall to allow
> > per-segment info/flags.
> 
> Yep, in this case, it would have been nice if there were per segment
> flags to identify type of segment. But unfortunately we don't have. So
> in the absence of that, I think putting 4 bytes as dtb magic in the
> beginning of segment should work (though no ideal).

I don't disagree it will work for the simple kernel + DTB case. The
existing DTB magic  (which is part of the DTB rather than an addition)
is sufficient to identify _a_ DTB, but that doesn't mean that you want
to pass that DTB to the next kernel, nor that you want it in x0.

That might be important for booting other OSs, or for loading multiple
kernels (if you want linux as a bootloader to load a kernel +
crashkernel pair with separate DTBs).

I believe it would be feasible to (at some point) add a new kexec flag
allowing us to pass a list of (new) struct kexec_segment_extended
elements to address that.

[...]

> > For the DTB, reusing the original DTB is a possibility. From what I
> > recall, Grant seemed to prefer re-packing the existing tree as this
> > would allow for state destroyed at boot to be corrected for.
> > 
> > Regardless, being able to pass a DTB from userspace is a useful option
> > (especially for the Linux-as-a-bootloader approach that's been mentioned
> > a lot). That doesn't work for the secureboot case without a new syscall
> > as we can't pass a signed DTB (or any other additional objects other
> > than an initrd) to kexec_file_load, but disallowing the user to pass a
> > new DTB in that case seems reasonable.
> 
> Yes, kexec_file_load() will not allow passing anything except, kernel,
> initrd and command line. So syscall implementation will have to resuse
> the existing DTB and pass it to second kernel. 
> 
> If there are concerns w.r.t state of DTB which can be destroyed during
> boot, I guess we will have to store a copy of DTB somewhere early during
> boot and kexec can access that original copy during kernel load time.

The issue with state wasn't that the DTB binary gets modified but
rather that the state of the system is changed such that the state
described in the original DTB is no longer correct. Consider a simple
framebuffer setup maintained from the bootloader that the kernel later
decides to modify at the behest of the user.

As I mention above, I believe that Grant was of the opinion that the
live/unpacked device tree should be modified to reflect the system
state, and should be repacked prior to a kexec.

Mark.

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 6/7] arm64/kexec: Add core kexec support
  2014-10-01 17:47         ` Vivek Goyal
@ 2014-10-01 18:03           ` Mark Rutland
  2014-10-01 18:09             ` Vivek Goyal
  2014-10-01 19:22             ` Vivek Goyal
  0 siblings, 2 replies; 65+ messages in thread
From: Mark Rutland @ 2014-10-01 18:03 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, Oct 01, 2014 at 06:47:14PM +0100, Vivek Goyal wrote:
> On Wed, Oct 01, 2014 at 05:16:21PM +0100, Mark Rutland wrote:
> 
> [..]
> > I'm still rather unhappy about the mechanism by which the DTB is passed
> > by userspace and detected by the kernel, as I'd prefer that the user
> > explictly stated which segment they wanted to pass to the (Linux)
> > kernel, but that would require reworking the kexec syscall to allow
> > per-segment info/flags.
> 
> Why does the running kernel need to know about dtb segment.  I see following.
> 
> ldr     x0, kexec_dtb_addr
> 
> IIUC, we are loading this address in x0. Can't we do something similar
> in user space with purgatory. I mean first jump to purgatory (code
> compiled in user space but runs prviliged) and that code takes care
> of loading x0 with right dtb addr and then jump to final kernel.

I believe the fundamental issue here is a lack of a userspace-provided
purgatory.

I agree that userspace purgatory code could set this up. That would
address my concerns w.r.t. detecting the DTB kernel-side, as there would
be no need. It would also address my concerns with booting OSs other
than Linux, as the purgatory code could do whatever was appropriate for
whatever OS image was loaded.

So in my view, a userspace-provided purgatory that set up the state the
next kernel expected would be preferable. That could be as simple as
setting up the registers and branching -- I assume we'd have the first
kernel perform the required cache maintenance.

Mark.

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 6/7] arm64/kexec: Add core kexec support
  2014-10-01 18:03           ` Mark Rutland
@ 2014-10-01 18:09             ` Vivek Goyal
  2014-10-01 18:19               ` Mark Rutland
  2014-10-01 19:22             ` Vivek Goyal
  1 sibling, 1 reply; 65+ messages in thread
From: Vivek Goyal @ 2014-10-01 18:09 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, Oct 01, 2014 at 07:03:04PM +0100, Mark Rutland wrote:
> On Wed, Oct 01, 2014 at 06:47:14PM +0100, Vivek Goyal wrote:
> > On Wed, Oct 01, 2014 at 05:16:21PM +0100, Mark Rutland wrote:
> > 
> > [..]
> > > I'm still rather unhappy about the mechanism by which the DTB is passed
> > > by userspace and detected by the kernel, as I'd prefer that the user
> > > explictly stated which segment they wanted to pass to the (Linux)
> > > kernel, but that would require reworking the kexec syscall to allow
> > > per-segment info/flags.
> > 
> > Why does the running kernel need to know about dtb segment.  I see following.
> > 
> > ldr     x0, kexec_dtb_addr
> > 
> > IIUC, we are loading this address in x0. Can't we do something similar
> > in user space with purgatory. I mean first jump to purgatory (code
> > compiled in user space but runs prviliged) and that code takes care
> > of loading x0 with right dtb addr and then jump to final kernel.
> 
> I believe the fundamental issue here is a lack of a userspace-provided
> purgatory.
> 
> I agree that userspace purgatory code could set this up. That would
> address my concerns w.r.t. detecting the DTB kernel-side, as there would
> be no need. It would also address my concerns with booting OSs other
> than Linux, as the purgatory code could do whatever was appropriate for
> whatever OS image was loaded.
> 
> So in my view, a userspace-provided purgatory that set up the state the
> next kernel expected would be preferable. That could be as simple as
> setting up the registers and branching -- I assume we'd have the first
> kernel perform the required cache maintenance.

Apart from setting various registers, we also verify the sha256 checksums
of loaded segments in purgatory to make sure segments are not corrupted.
On x86, we also take care of backing up first 640KB of memory in reserved
area in kdump case. 

So other arches are already doing all this in purgatory. It would be nice
if arm64 sticks to that convention too.

First kernel --> purgatory ----> second kernel.

Thanks
Vivek

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 6/7] arm64/kexec: Add core kexec support
  2014-10-01 18:09             ` Vivek Goyal
@ 2014-10-01 18:19               ` Mark Rutland
  2014-10-01 18:31                 ` Vivek Goyal
  0 siblings, 1 reply; 65+ messages in thread
From: Mark Rutland @ 2014-10-01 18:19 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, Oct 01, 2014 at 07:09:09PM +0100, Vivek Goyal wrote:
> On Wed, Oct 01, 2014 at 07:03:04PM +0100, Mark Rutland wrote:
> > On Wed, Oct 01, 2014 at 06:47:14PM +0100, Vivek Goyal wrote:
> > > On Wed, Oct 01, 2014 at 05:16:21PM +0100, Mark Rutland wrote:
> > > 
> > > [..]
> > > > I'm still rather unhappy about the mechanism by which the DTB is passed
> > > > by userspace and detected by the kernel, as I'd prefer that the user
> > > > explictly stated which segment they wanted to pass to the (Linux)
> > > > kernel, but that would require reworking the kexec syscall to allow
> > > > per-segment info/flags.
> > > 
> > > Why does the running kernel need to know about dtb segment.  I see following.
> > > 
> > > ldr     x0, kexec_dtb_addr
> > > 
> > > IIUC, we are loading this address in x0. Can't we do something similar
> > > in user space with purgatory. I mean first jump to purgatory (code
> > > compiled in user space but runs prviliged) and that code takes care
> > > of loading x0 with right dtb addr and then jump to final kernel.
> > 
> > I believe the fundamental issue here is a lack of a userspace-provided
> > purgatory.
> > 
> > I agree that userspace purgatory code could set this up. That would
> > address my concerns w.r.t. detecting the DTB kernel-side, as there would
> > be no need. It would also address my concerns with booting OSs other
> > than Linux, as the purgatory code could do whatever was appropriate for
> > whatever OS image was loaded.
> > 
> > So in my view, a userspace-provided purgatory that set up the state the
> > next kernel expected would be preferable. That could be as simple as
> > setting up the registers and branching -- I assume we'd have the first
> > kernel perform the required cache maintenance.
> 
> Apart from setting various registers, we also verify the sha256 checksums
> of loaded segments in purgatory to make sure segments are not corrupted.
> On x86, we also take care of backing up first 640KB of memory in reserved
> area in kdump case. 

I was under the (possibly mistaken) impression that for kdump the second
kernel lived and ran at a high address so as to preserve memory in use
by the first kernel. Is the first 640KiB is special on x86, or is does
it have some kdump-specific use?

> So other arches are already doing all this in purgatory. It would be nice
> if arm64 sticks to that convention too.
> 
> First kernel --> purgatory ----> second kernel.

That would also be my preference, especially given the flexibility this
would leave.

Mark.

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 6/7] arm64/kexec: Add core kexec support
  2014-10-01 18:19               ` Mark Rutland
@ 2014-10-01 18:31                 ` Vivek Goyal
  0 siblings, 0 replies; 65+ messages in thread
From: Vivek Goyal @ 2014-10-01 18:31 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, Oct 01, 2014 at 07:19:59PM +0100, Mark Rutland wrote:
> On Wed, Oct 01, 2014 at 07:09:09PM +0100, Vivek Goyal wrote:
> > On Wed, Oct 01, 2014 at 07:03:04PM +0100, Mark Rutland wrote:
> > > On Wed, Oct 01, 2014 at 06:47:14PM +0100, Vivek Goyal wrote:
> > > > On Wed, Oct 01, 2014 at 05:16:21PM +0100, Mark Rutland wrote:
> > > > 
> > > > [..]
> > > > > I'm still rather unhappy about the mechanism by which the DTB is passed
> > > > > by userspace and detected by the kernel, as I'd prefer that the user
> > > > > explictly stated which segment they wanted to pass to the (Linux)
> > > > > kernel, but that would require reworking the kexec syscall to allow
> > > > > per-segment info/flags.
> > > > 
> > > > Why does the running kernel need to know about dtb segment.  I see following.
> > > > 
> > > > ldr     x0, kexec_dtb_addr
> > > > 
> > > > IIUC, we are loading this address in x0. Can't we do something similar
> > > > in user space with purgatory. I mean first jump to purgatory (code
> > > > compiled in user space but runs prviliged) and that code takes care
> > > > of loading x0 with right dtb addr and then jump to final kernel.
> > > 
> > > I believe the fundamental issue here is a lack of a userspace-provided
> > > purgatory.
> > > 
> > > I agree that userspace purgatory code could set this up. That would
> > > address my concerns w.r.t. detecting the DTB kernel-side, as there would
> > > be no need. It would also address my concerns with booting OSs other
> > > than Linux, as the purgatory code could do whatever was appropriate for
> > > whatever OS image was loaded.
> > > 
> > > So in my view, a userspace-provided purgatory that set up the state the
> > > next kernel expected would be preferable. That could be as simple as
> > > setting up the registers and branching -- I assume we'd have the first
> > > kernel perform the required cache maintenance.
> > 
> > Apart from setting various registers, we also verify the sha256 checksums
> > of loaded segments in purgatory to make sure segments are not corrupted.
> > On x86, we also take care of backing up first 640KB of memory in reserved
> > area in kdump case. 
> 
> I was under the (possibly mistaken) impression that for kdump the second
> kernel lived and ran at a high address so as to preserve memory in use
> by the first kernel. Is the first 640KiB is special on x86, or is does
> it have some kdump-specific use?

Use of first 640KB by second kernel is x86 specific. And it was long back
and I am not sure if this requirement exists today or not. Just that
things have been working and nobody has bothered to look into optimizing
it further.

Kdump kernel does run from reserved memory. This memory is reserved
very early during boot so that first kernel does not end up using it.
So it does not matter whether that memory is reserved high or low. First
kernel is not going to use it as it is reserved. Hence memory contents
of first kernel will be preserved.

Thanks
Vivek

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 6/7] arm64/kexec: Add core kexec support
  2014-10-01 18:03           ` Mark Rutland
  2014-10-01 18:09             ` Vivek Goyal
@ 2014-10-01 19:22             ` Vivek Goyal
  2014-10-02 10:26               ` Mark Rutland
  1 sibling, 1 reply; 65+ messages in thread
From: Vivek Goyal @ 2014-10-01 19:22 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, Oct 01, 2014 at 07:03:04PM +0100, Mark Rutland wrote:

[..]
> I assume we'd have the first kernel perform the required cache maintenance.
> 

Hi Mark,

I am wondering, what kind of cache management is required here? What kind of
dcaches are present on arm64. I see that Geoff's patches flush dcaches for 
certain kexec stored pages using __flush_dcache_area()
(in kexec_list_flush_cb()).

arch/arm64/include/asm/cacheflush.h says following.

 *      __flush_dcache_area(kaddr, size)
 *
 *              Ensure that the data held in page is written back.
 *              - kaddr  - page address
 *              - size   - region size

So looks like we are trying to write back anything which we will access
after switching off MMU. If that's the case, I have two questions.

- Why do we need to writeback that cacheline. After switching off MMU,
  will we not access same cacheline. I thought caches are VIPT and tag
  will still remain the same (but I might easily be wrong here).

- Even if we have to flush that cacheline, for kexec pages, I guess it
  should be done at kernel load time and not at the time of transition
  into new kernel. That seems too late. Once the kernel has been loaded,
  we don't overwrite these pages anymore. So a dcache flush at that
  time should be good.

Thanks
Vivek

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 6/7] arm64/kexec: Add core kexec support
  2014-10-01 19:22             ` Vivek Goyal
@ 2014-10-02 10:26               ` Mark Rutland
  2014-10-02 13:54                 ` Vivek Goyal
  0 siblings, 1 reply; 65+ messages in thread
From: Mark Rutland @ 2014-10-02 10:26 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, Oct 01, 2014 at 08:22:45PM +0100, Vivek Goyal wrote:
> On Wed, Oct 01, 2014 at 07:03:04PM +0100, Mark Rutland wrote:
> 
> [..]
> > I assume we'd have the first kernel perform the required cache maintenance.
> > 
> 
> Hi Mark,
> 
> I am wondering, what kind of cache management is required here? What kind of
> dcaches are present on arm64.

In ARMv8 there's a hierarchy of quasi-PIPT D-caches; they generally
behave like (and can be maintained as if) they are PIPT but might not
actually be PIPT. There may be a system level cache between the
architected cache hierarchy and memory (that should respect cache
maintenance by VA).

The MT_NORMAL attributes are such that most memory the kernel maps will
have write-back read/write allocate attributes. So cache maintenance is
required to ensure that data is cleaned from the D-caches out to the PoC
(the point in the memory system at which non-cacheable accesses can see
the same data), such that the CPU can see the images rather than stale
data once translation is disabled.

> I see that Geoff's patches flush dcaches for 
> certain kexec stored pages using __flush_dcache_area()
> (in kexec_list_flush_cb()).
> 
> arch/arm64/include/asm/cacheflush.h says following.
> 
>  *      __flush_dcache_area(kaddr, size)
>  *
>  *              Ensure that the data held in page is written back.
>  *              - kaddr  - page address
>  *              - size   - region size
> 
> So looks like we are trying to write back anything which we will access
> after switching off MMU. If that's the case, I have two questions.
> 
> - Why do we need to writeback that cacheline. After switching off MMU,
>   will we not access same cacheline. I thought caches are VIPT and tag
>   will still remain the same (but I might easily be wrong here).

As I mention above, the initial cache flush by VA is to ensure that the
data is visible to the CPU once translation is disabled. I'm not sure I
follow your reasoning.

> - Even if we have to flush that cacheline, for kexec pages, I guess it
>   should be done at kernel load time and not at the time of transition
>   into new kernel. That seems too late. Once the kernel has been loaded,
>   we don't overwrite these pages anymore. So a dcache flush at that
>   time should be good.

Given the current assumption at boot is that the kernel image should be
clean in the D-cache hierarchy (but not necessarily anything else), that
should be fine. However, we may need to nuke the I-cache when branching
to the purgatory code as the I-cache could be PIPT, VIPT, or ASID-tagged
VIVT.

If the purgatory code moves anything around it will need to perform
maintenance by VA to ensure stale dirty lines don't overwrite anything,
and stale clean lines don't shadow anything.

Mark.

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 6/7] arm64/kexec: Add core kexec support
  2014-10-02 10:26               ` Mark Rutland
@ 2014-10-02 13:54                 ` Vivek Goyal
  2014-10-02 16:53                   ` Mark Rutland
  0 siblings, 1 reply; 65+ messages in thread
From: Vivek Goyal @ 2014-10-02 13:54 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, Oct 02, 2014 at 11:26:25AM +0100, Mark Rutland wrote:
> On Wed, Oct 01, 2014 at 08:22:45PM +0100, Vivek Goyal wrote:
> > On Wed, Oct 01, 2014 at 07:03:04PM +0100, Mark Rutland wrote:
> > 
> > [..]
> > > I assume we'd have the first kernel perform the required cache maintenance.
> > > 
> > 
> > Hi Mark,
> > 
> > I am wondering, what kind of cache management is required here? What kind of
> > dcaches are present on arm64.
> 
> In ARMv8 there's a hierarchy of quasi-PIPT D-caches; they generally
> behave like (and can be maintained as if) they are PIPT but might not
> actually be PIPT. There may be a system level cache between the
> architected cache hierarchy and memory (that should respect cache
> maintenance by VA).
> 
> The MT_NORMAL attributes are such that most memory the kernel maps will
> have write-back read/write allocate attributes. So cache maintenance is
> required to ensure that data is cleaned from the D-caches out to the PoC
> (the point in the memory system at which non-cacheable accesses can see
> the same data), such that the CPU can see the images rather than stale
> data once translation is disabled.
> 
> > I see that Geoff's patches flush dcaches for 
> > certain kexec stored pages using __flush_dcache_area()
> > (in kexec_list_flush_cb()).
> > 
> > arch/arm64/include/asm/cacheflush.h says following.
> > 
> >  *      __flush_dcache_area(kaddr, size)
> >  *
> >  *              Ensure that the data held in page is written back.
> >  *              - kaddr  - page address
> >  *              - size   - region size
> > 
> > So looks like we are trying to write back anything which we will access
> > after switching off MMU. If that's the case, I have two questions.
> > 
> > - Why do we need to writeback that cacheline. After switching off MMU,
> >   will we not access same cacheline. I thought caches are VIPT and tag
> >   will still remain the same (but I might easily be wrong here).
> 
> As I mention above, the initial cache flush by VA is to ensure that the
> data is visible to the CPU once translation is disabled. I'm not sure I
> follow your reasoning.

I was assuming that even after we disable translations, cpu will still
read data from dcache if it is available there. Looks like you are
saying that once translation is disabled, data will be read from memory
hence it is important to flush out dcache before disabling translation.
Did I understand it right?

Thanks
Vivek

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 6/7] arm64/kexec: Add core kexec support
  2014-10-02 13:54                 ` Vivek Goyal
@ 2014-10-02 16:53                   ` Mark Rutland
  0 siblings, 0 replies; 65+ messages in thread
From: Mark Rutland @ 2014-10-02 16:53 UTC (permalink / raw)
  To: linux-arm-kernel

> > > I see that Geoff's patches flush dcaches for 
> > > certain kexec stored pages using __flush_dcache_area()
> > > (in kexec_list_flush_cb()).
> > > 
> > > arch/arm64/include/asm/cacheflush.h says following.
> > > 
> > >  *      __flush_dcache_area(kaddr, size)
> > >  *
> > >  *              Ensure that the data held in page is written back.
> > >  *              - kaddr  - page address
> > >  *              - size   - region size
> > > 
> > > So looks like we are trying to write back anything which we will access
> > > after switching off MMU. If that's the case, I have two questions.
> > > 
> > > - Why do we need to writeback that cacheline. After switching off MMU,
> > >   will we not access same cacheline. I thought caches are VIPT and tag
> > >   will still remain the same (but I might easily be wrong here).
> > 
> > As I mention above, the initial cache flush by VA is to ensure that the
> > data is visible to the CPU once translation is disabled. I'm not sure I
> > follow your reasoning.
> 
> I was assuming that even after we disable translations, cpu will still
> read data from dcache if it is available there. Looks like you are
> saying that once translation is disabled, data will be read from memory
> hence it is important to flush out dcache before disabling translation.
> Did I understand it right?

I believe you did.

When translation is disabled (i.e. SCTLR_ELx.M == 0), data accesses are
assigned Device-nGnRnE attributes regardless of whether the caches are
enabled (i.e. SCTLR_ELx.C == 1), and bypass the cache hierarchy. So
accesses to memory will go straight to PoC (essentially memory), and
won't hit in any cache.

However, instruction accesses are more complicated. They are always
assigned Normal memory attributes, and if the I-caches are enabled (i.e.
SCTLR_ELx.I == 1) they are cacheable regardless of whether translation
is enabled. So I-cache maintenance may be required when translation is
disabled.

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 0/7] arm64 kexec kernel patches V3
  2014-09-30 21:27   ` Geoff Levand
@ 2014-10-02 19:08     ` Vivek Goyal
  2014-10-02 22:59       ` Geoff Levand
  0 siblings, 1 reply; 65+ messages in thread
From: Vivek Goyal @ 2014-10-02 19:08 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Sep 30, 2014 at 02:27:56PM -0700, Geoff Levand wrote:
> Hi Vivek,
> 
> On Tue, 2014-09-30 at 16:29 -0400, Vivek Goyal wrote:
> > On Thu, Sep 25, 2014 at 12:23:26AM +0000, Geoff Levand wrote:
> > 
> > [..]
> > > To load a second stage kernel and execute a kexec re-boot on arm64 my patches to
> > > kexec-tools [2], which have not yet been merged upstream, are needed.
> > > 
> > > This series does not include some re-work of the spin-table CPU enable method
> > > that is needed to support it,
> > 
> > How do I figure out if my system has spin table enable method or psci
> > enable method. Can one change it. I wanted to test your patches.
> 
> The enable method is a function the firmware/bootloader provides.
> Multiple methods may be supported.  The boot-wrapper-aarch64
> build defaults to spin-table, but has the configure option --enable-psci.
> 
> For a running system you can check the device tree:
> 
>   cat /proc/device-tree/cpus/cpu\@0/enable-method | hexdump -C
> 

Hi Geoff,

So system I have supports spin-table method for cpu bringup. How do I 
test your patches with that system. Are there any patches on your
spin-table branch which can make it working?

Thanks
Vivek

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 0/7] arm64 kexec kernel patches V3
  2014-10-02 19:08     ` Vivek Goyal
@ 2014-10-02 22:59       ` Geoff Levand
  2014-10-07 13:43         ` Vivek Goyal
  0 siblings, 1 reply; 65+ messages in thread
From: Geoff Levand @ 2014-10-02 22:59 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Vivek,

On Thu, 2014-10-02 at 15:08 -0400, Vivek Goyal wrote:
> On Tue, Sep 30, 2014 at 02:27:56PM -0700, Geoff Levand wrote:
> > For a running system you can check the device tree:
> > 
> >   cat /proc/device-tree/cpus/cpu\@0/enable-method | hexdump -C
> > 
> 
> So system I have supports spin-table method for cpu bringup. How do I 
> test your patches with that system. Are there any patches on your
> spin-table branch which can make it working?

If possible, check if there is a firmware update that supports PSCI.

My spin-table patches are now out of date, and fixing those up is
now low priority.

I modified kexec-tools to only issue a message, but accept a device
tree that does not have the new cpu-return-addr property that is
needed for kexec on spin-table systems.  Since the spin-table stuff
is only for managing secondary CPUs, this change should allow you to
test kexec with a 1st stage kernel built with CONFIG_SMP=n.

Since the secondary CPUs will have never left the spin-table, you
should be able to kexec re-boot into an SMP kernel, but you will
not be able to do a successful kexec re-boot from there.

-Geoff

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 1/7] arm64/kvm: Fix assembler compatibility of macros
  2014-09-25  0:23 ` [PATCH 1/7] arm64/kvm: Fix assembler compatibility of macros Geoff Levand
@ 2014-10-03 10:26   ` Mark Rutland
  2014-10-03 22:27     ` Geoff Levand
  0 siblings, 1 reply; 65+ messages in thread
From: Mark Rutland @ 2014-10-03 10:26 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, Sep 25, 2014 at 01:23:26AM +0100, Geoff Levand wrote:
> Some of the macros defined in kvm_arm.h are useful in the exception vector
> routines, but they are not compatible with the assembler.  Change the
> definition of ESR_EL2_ISS to be compatible.
> 
> Fixes build errors like these when using kvm_arm.h in assembly
> source files:
> 
>   Error: unexpected characters following instruction at operand 3 -- `and x0,x1,#((1U<<25)-1)'
> 
> Signed-off-by: Geoff Levand <geoff@infradead.org>
> ---
>  arch/arm64/include/asm/kvm_arm.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
> index cc83520..fb42ab5 100644
> --- a/arch/arm64/include/asm/kvm_arm.h
> +++ b/arch/arm64/include/asm/kvm_arm.h
> @@ -175,7 +175,7 @@
>  /* Exception Syndrome Register (ESR) bits */
>  #define ESR_EL2_EC_SHIFT	(26)
>  #define ESR_EL2_EC		(0x3fU << ESR_EL2_EC_SHIFT)
> -#define ESR_EL2_IL		(1U << 25)
> +#define ESR_EL2_IL		(UL(1) << 25)

This looks fine for ESR_EL2_IL (and hence ESR_EL2_ISS)...

>  #define ESR_EL2_ISS		(ESR_EL2_IL - 1)
>  #define ESR_EL2_ISV_SHIFT	(24)
>  #define ESR_EL2_ISV		(1U << ESR_EL2_ISV_SHIFT)

... but it seems there are a few more instances that could blow up if
used in asm.

Would you be able to fix those up at the same time? It doesn't look like there
are too many, and it should be a fairly mechanical change:

[mark@leverpostej:~/src/linux]% grep '[0-9][UL]' -- arch/arm64/include/asm/kvm_arm.h    
#define VTTBR_BADDR_MASK  (((1LLU << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
#define VTTBR_VMID_SHIFT  (48LLU)
#define ESR_EL2_IL              (1U << 25)
#define ESR_EL2_ISV             (1U << ESR_EL2_ISV_SHIFT)
#define ESR_EL2_SAS             (3U << ESR_EL2_SAS_SHIFT)
#define ESR_EL2_CV              (1U << ESR_EL2_CV_SHIFT)

Cheers,
Mark.

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 2/7] arm64: Convert hcalls to use ISS field
  2014-09-25  0:23 ` [PATCH 2/7] arm64: Convert hcalls to use ISS field Geoff Levand
@ 2014-10-03 10:51   ` Mark Rutland
  2014-10-03 21:56     ` Geoff Levand
  0 siblings, 1 reply; 65+ messages in thread
From: Mark Rutland @ 2014-10-03 10:51 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Geoff,

On Thu, Sep 25, 2014 at 01:23:26AM +0100, Geoff Levand wrote:
> To allow for additional hcalls to be defined and to make the arm64 hcall API
> more consistent across exception vector routines change the hcall implementations
> to use the ISS field of the ESR_EL2 register to specify the hcall type.
> 
> The existing arm64 hcall implementations are limited in that they only allow
> for two distinct hcalls; with the x0 register either zero, or not zero.  Also,
> the API of the hyp-stub exception vector routines and the KVM exception vector
> routines differ; hyp-stub uses a non-zero value in x0 to implement
> __hyp_set_vectors, whereas KVM uses it to implement kvm_call_hyp.
> 
> Define three new preprocessor macros HVC_GET_VECTORS, HVC_SET_VECTORS and
> HVC_CALL_HYP and to be used as hcall type specifiers and convert the
> existing __hyp_get_vectors(), __hyp_set_vectors() and kvm_call_hyp() routines
> to use these new macros when executing and HVC call.  Also change the
> corresponding hyp-stub and KVM el1_sync exception vector routines to use these
> new macros.

I'm definitely in favour of using the HVC immediate.

I just have some comments on the mechanics below.

> Signed-off-by: Geoff Levand <geoff@infradead.org>
> ---
>  arch/arm64/include/asm/virt.h | 20 ++++++++++++++++++++
>  arch/arm64/kernel/hyp-stub.S  | 33 +++++++++++++++++++++------------
>  arch/arm64/kvm/hyp.S          | 18 +++++++++++-------
>  3 files changed, 52 insertions(+), 19 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
> index 7a5df52..99c319c 100644
> --- a/arch/arm64/include/asm/virt.h
> +++ b/arch/arm64/include/asm/virt.h
> @@ -21,6 +21,26 @@
>  #define BOOT_CPU_MODE_EL1	(0xe11)
>  #define BOOT_CPU_MODE_EL2	(0xe12)
>  
> +/*
> + * HVC_GET_VECTORS - Return the value of the vbar_el2 register.
> + */
> +
> +#define HVC_GET_VECTORS 1
> +
> +/*
> + * HVC_SET_VECTORS - Set the value of the vbar_el2 register.
> + *
> + * @x0: Physical address of the new vector table.
> + */
> +
> +#define HVC_SET_VECTORS 2
> +
> +/*
> + * HVC_CALL_HYP - Execute a hyp routine.
> + */
> +
> +#define HVC_CALL_HYP 3
> +
>  #ifndef __ASSEMBLY__
>  
>  /*
> diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
> index a272f33..8b39522 100644
> --- a/arch/arm64/kernel/hyp-stub.S
> +++ b/arch/arm64/kernel/hyp-stub.S
> @@ -53,14 +53,22 @@ ENDPROC(__hyp_stub_vectors)
>  	.align 11
>  
>  el1_sync:
> -	mrs	x1, esr_el2
> -	lsr	x1, x1, #26
> -	cmp	x1, #0x16
> -	b.ne	2f				// Not an HVC trap
> -	cbz	x0, 1f
> -	msr	vbar_el2, x0			// Set vbar_el2
> +	mrs	x18, esr_el2
> +	lsr	x17, x18, #26			// x17=EC

The first patch enabled kvm_arm.h macros to be used from asm. Can't we
use them here? If we don't include kvm_arm.h in the hyp stub, maybe we
should (ideally we'd factor common stuff out, but I can see that getting
messy fast).

Assuming we can use them:

	lsr	x17, x18, #ESR_EL2_EC_SHIFT

> +	and	x18, x18, #0xffffff		// x18=ISS

	and	x18, x18, ESR_EL2_ISS

> +
> +	cmp     x17, #0x16

	cmp 	x17, #ESR_EL2_EC_HVC64

> +	b.ne    2f				// Not an HVC trap
> +
> +	cmp	x18, #HVC_GET_VECTORS
> +	b.ne	1f
> +	mrs	x0, vbar_el2
>  	b	2f
> -1:	mrs	x0, vbar_el2			// Return vbar_el2
> +
> +1:	cmp	x18, #HVC_SET_VECTORS
> +	b.ne	2f
> +	msr	vbar_el2, x0
> +
>  2:	eret
>  ENDPROC(el1_sync)
>  
> @@ -100,11 +108,12 @@ ENDPROC(\label)
>   * initialisation entry point.
>   */
>  
> -ENTRY(__hyp_get_vectors)
> -	mov	x0, xzr
> -	// fall through
>  ENTRY(__hyp_set_vectors)
> -	hvc	#0
> +	hvc	#HVC_SET_VECTORS
>  	ret
> -ENDPROC(__hyp_get_vectors)
>  ENDPROC(__hyp_set_vectors)
> +
> +ENTRY(__hyp_get_vectors)
> +	hvc	#HVC_GET_VECTORS
> +	ret
> +ENDPROC(__hyp_get_vectors)
> diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
> index b72aa9f..9607f15 100644
> --- a/arch/arm64/kvm/hyp.S
> +++ b/arch/arm64/kvm/hyp.S
> @@ -26,6 +26,7 @@
>  #include <asm/kvm_asm.h>
>  #include <asm/kvm_arm.h>
>  #include <asm/kvm_mmu.h>
> +#include <asm/virt.h>

As far as I can see, virt.h only defines BOOT_CPU_MODE_EL{1,2}, which we
don't use below. So I don't think we need this include.

Otherwise this looks fine to me.

Mark.

>  
>  #define CPU_GP_REG_OFFSET(x)	(CPU_GP_REGS + x)
>  #define CPU_XREG_OFFSET(x)	CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x)
> @@ -1105,12 +1106,9 @@ __hyp_panic_str:
>   * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c).  Return values are
>   * passed in r0 and r1.
>   *
> - * A function pointer with a value of 0 has a special meaning, and is
> - * used to implement __hyp_get_vectors in the same way as in
> - * arch/arm64/kernel/hyp_stub.S.
>   */
>  ENTRY(kvm_call_hyp)
> -	hvc	#0
> +	hvc	#HVC_CALL_HYP
>  	ret
>  ENDPROC(kvm_call_hyp)
>  
> @@ -1140,6 +1138,7 @@ el1_sync:					// Guest trapped into EL2
>  	push	x2, x3
>  
>  	mrs	x1, esr_el2
> +	and	x0, x1, #ESR_EL2_ISS
>  	lsr	x2, x1, #ESR_EL2_EC_SHIFT
>  
>  	cmp	x2, #ESR_EL2_EC_HVC64
> @@ -1149,15 +1148,19 @@ el1_sync:					// Guest trapped into EL2
>  	cbnz	x3, el1_trap			// called HVC
>  
>  	/* Here, we're pretty sure the host called HVC. */
> +	mov	x18, x0
>  	pop	x2, x3
>  	pop	x0, x1
>  
> -	/* Check for __hyp_get_vectors */
> -	cbnz	x0, 1f
> +	cmp	x18, #HVC_GET_VECTORS
> +	b.ne	1f
>  	mrs	x0, vbar_el2
>  	b	2f
>  
> -1:	push	lr, xzr
> +1:	cmp	x18, #HVC_CALL_HYP
> +	b.ne	2f
> +
> +	push	lr, xzr
>  
>  	/*
>  	 * Compute the function address in EL2, and shuffle the parameters.
> @@ -1170,6 +1173,7 @@ el1_sync:					// Guest trapped into EL2
>  	blr	lr
>  
>  	pop	lr, xzr
> +
>  2:	eret
>  
>  el1_trap:
> -- 
> 1.9.1
> 
> 
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
> 

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 6/7] arm64/kexec: Add core kexec support
  2014-10-01 14:56       ` Vivek Goyal
@ 2014-10-03 18:35         ` Geoff Levand
  2014-10-07 13:44           ` Vivek Goyal
  0 siblings, 1 reply; 65+ messages in thread
From: Geoff Levand @ 2014-10-03 18:35 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Vivek,

On Wed, 2014-10-01 at 10:56 -0400, Vivek Goyal wrote:
> On Tue, Sep 30, 2014 at 12:54:37PM -0700, Geoff Levand wrote:
> 
> [..]
> > > > +{
> > > > +	switch (flag) {
> > > > +	case IND_INDIRECTION:
> > > > +	case IND_SOURCE:
> > > > +		__flush_dcache_area(addr, PAGE_SIZE);
> > > > +		break;
> > > 
> > > So what does __flush_dcache_area() do? Flush data caches. IIUC, addr
> > > is virtual address at this point of time. While copying pages and
> > > walking through the list, I am assuming you have switched off page
> > > tables and you are in some kind of 1:1 physical mode. So how did
> > > flushing data caches related to a virtual address help. I guess we
> > > are not even accessing that virtual address now.
> > 
> > __flush_dcache_area(), and the underling aarch64 civac instruction
> > operate on virtual addresses.  Here we are still running with the
> > MMU on and the identity mapping has not yet been enabled.  This is
> > the sequence:
> > 
> >   flush dcache -> turn off MMU, dcache -> access memory (PoC) directly 
> 
> Sorry, I don't understand that why do we need to flush dcache for source
> and indirection page addresses. Some information here will help.

I think Mark answered this.  The architecture requires us to flush to
the point of coherency (PoC) anything that will be used after the
dcache is disabled.

For more info you can look at the section'D4.4.7 Cache maintenance
operations' in the ARMv8 Reference Manual you can get from here (after
registering):

  http://infocenter.arm.com/help/topic/com.arm.doc.subset.architecture.reference/index.html

-Geoff

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 0/7] arm64 kexec kernel patches V3
  2014-10-01 15:19 ` Vivek Goyal
@ 2014-10-03 21:16   ` Geoff Levand
  2014-10-07 13:40     ` Vivek Goyal
  2014-10-07 15:22     ` Mark Rutland
  0 siblings, 2 replies; 65+ messages in thread
From: Geoff Levand @ 2014-10-03 21:16 UTC (permalink / raw)
  To: linux-arm-kernel

Hi,

On Wed, 2014-10-01 at 11:19 -0400, Vivek Goyal wrote:
> On Thu, Sep 25, 2014 at 12:23:26AM +0000, Geoff Levand wrote:
> > Hi All,
> > 
> > This series adds the core support for kexec re-boots on arm64.  I have tested
> > with the ARM VE fast model using various kernel config options for both the
> > first and second stage kernels.
> 
> Does this patch series work with kexec on UEFI machines?

I have not done much more than some simple re-boot tests using the
base model (FVP_Base_AEMv8A-AEMv8A) and the Linaro 14.08 efi build,
but yes, it should work.

-Geoff

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 2/7] arm64: Convert hcalls to use ISS field
  2014-10-03 10:51   ` Mark Rutland
@ 2014-10-03 21:56     ` Geoff Levand
  2014-10-06 10:13       ` Mark Rutland
  0 siblings, 1 reply; 65+ messages in thread
From: Geoff Levand @ 2014-10-03 21:56 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Mark,

On Fri, 2014-10-03 at 11:51 +0100, Mark Rutland wrote:
> On Thu, Sep 25, 2014 at 01:23:26AM +0100, Geoff Levand wrote:

>  index 7a5df52..99c319c 100644
> > --- a/arch/arm64/include/asm/virt.h
> > +++ b/arch/arm64/include/asm/virt.h
> > @@ -21,6 +21,26 @@
> >  #define BOOT_CPU_MODE_EL1	(0xe11)
> >  #define BOOT_CPU_MODE_EL2	(0xe12)
> >  
> > +/*
> > + * HVC_GET_VECTORS - Return the value of the vbar_el2 register.
> > + */
> > +
> > +#define HVC_GET_VECTORS 1
> > +
> > +/*
> > + * HVC_SET_VECTORS - Set the value of the vbar_el2 register.
> > + *
> > + * @x0: Physical address of the new vector table.
> > + */
> > +
> > +#define HVC_SET_VECTORS 2
> > +
> > +/*
> > + * HVC_CALL_HYP - Execute a hyp routine.
> > + */
> > +
> > +#define HVC_CALL_HYP 3
> > +
> >  #ifndef __ASSEMBLY__
> >  
> >  /*
> > diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
> > index a272f33..8b39522 100644
> > --- a/arch/arm64/kernel/hyp-stub.S
> > +++ b/arch/arm64/kernel/hyp-stub.S
> > @@ -53,14 +53,22 @@ ENDPROC(__hyp_stub_vectors)
> >  	.align 11
> >  
> >  el1_sync:
> > -	mrs	x1, esr_el2
> > -	lsr	x1, x1, #26
> > -	cmp	x1, #0x16
> > -	b.ne	2f				// Not an HVC trap
> > -	cbz	x0, 1f
> > -	msr	vbar_el2, x0			// Set vbar_el2
> > +	mrs	x18, esr_el2
> > +	lsr	x17, x18, #26			// x17=EC
> 
> The first patch enabled kvm_arm.h macros to be used from asm. Can't we
> use them here? If we don't include kvm_arm.h in the hyp stub, maybe we
> should (ideally we'd factor common stuff out, but I can see that getting
> messy fast).

I didn't want to tackle the cleanup of the headers in this patch
series, and I thought it odd to include a kvm header in this core
arm64 file, but maybe it is better to include kvm_arm.h and use the
ESR_EL2_ macros so the values are at least easier to understand and
keep track of.

> Assuming we can use them:
> 
> 	lsr	x17, x18, #ESR_EL2_EC_SHIFT
> 
> > +	and	x18, x18, #0xffffff		// x18=ISS
> 
> 	and	x18, x18, ESR_EL2_ISS
> 
> > +
> > +	cmp     x17, #0x16
> 
> 	cmp 	x17, #ESR_EL2_EC_HVC64
> 
> > +	b.ne    2f				// Not an HVC trap
> > +
> > +	cmp	x18, #HVC_GET_VECTORS
> > +	b.ne	1f
> > +	mrs	x0, vbar_el2
> >  	b	2f
> > -1:	mrs	x0, vbar_el2			// Return vbar_el2
> > +
> > +1:	cmp	x18, #HVC_SET_VECTORS
> > +	b.ne	2f
> > +	msr	vbar_el2, x0
> > +
> >  2:	eret
> >  ENDPROC(el1_sync)
> >  
> > @@ -100,11 +108,12 @@ ENDPROC(\label)
> >   * initialisation entry point.
> >   */
> >  
> > -ENTRY(__hyp_get_vectors)
> > -	mov	x0, xzr
> > -	// fall through
> >  ENTRY(__hyp_set_vectors)
> > -	hvc	#0
> > +	hvc	#HVC_SET_VECTORS
> >  	ret
> > -ENDPROC(__hyp_get_vectors)
> >  ENDPROC(__hyp_set_vectors)
> > +
> > +ENTRY(__hyp_get_vectors)
> > +	hvc	#HVC_GET_VECTORS
> > +	ret
> > +ENDPROC(__hyp_get_vectors)
> > diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
> > index b72aa9f..9607f15 100644
> > --- a/arch/arm64/kvm/hyp.S
> > +++ b/arch/arm64/kvm/hyp.S
> > @@ -26,6 +26,7 @@
> >  #include <asm/kvm_asm.h>
> >  #include <asm/kvm_arm.h>
> >  #include <asm/kvm_mmu.h>
> > +#include <asm/virt.h>
> 
> As far as I can see, virt.h only defines BOOT_CPU_MODE_EL{1,2}, which we
> don't use below. So I don't think we need this include.

We need asm/virt.h for the new HVC_ macros added to it by this
patch (above)...

> Otherwise this looks fine to me.
> 
> Mark.
> 
> >  
> >  #define CPU_GP_REG_OFFSET(x)	(CPU_GP_REGS + x)
> >  #define CPU_XREG_OFFSET(x)	CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x)
> > @@ -1105,12 +1106,9 @@ __hyp_panic_str:
> >   * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c).  Return values are
> >   * passed in r0 and r1.
> >   *
> > - * A function pointer with a value of 0 has a special meaning, and is
> > - * used to implement __hyp_get_vectors in the same way as in
> > - * arch/arm64/kernel/hyp_stub.S.
> >   */
> >  ENTRY(kvm_call_hyp)
> > -	hvc	#0
> > +	hvc	#HVC_CALL_HYP
> >  	ret
> >  ENDPROC(kvm_call_hyp)

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 1/7] arm64/kvm: Fix assembler compatibility of macros
  2014-10-03 10:26   ` Mark Rutland
@ 2014-10-03 22:27     ` Geoff Levand
  2014-10-06 10:10       ` Mark Rutland
  0 siblings, 1 reply; 65+ messages in thread
From: Geoff Levand @ 2014-10-03 22:27 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, 2014-10-03 at 11:26 +0100, Mark Rutland wrote:
> Would you be able to fix those up at the same time? It doesn't look like there
> are too many, and it should be a fairly mechanical change:

Sure.

> [mark at leverpostej:~/src/linux]% grep '[0-9][UL]' -- arch/arm64/include/asm/kvm_arm.h    
> #define VTTBR_BADDR_MASK  (((1LLU << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
> #define VTTBR_VMID_SHIFT  (48LLU)
> #define ESR_EL2_IL              (1U << 25)
> #define ESR_EL2_ISV             (1U << ESR_EL2_ISV_SHIFT)
> #define ESR_EL2_SAS             (3U << ESR_EL2_SAS_SHIFT)
> #define ESR_EL2_CV              (1U << ESR_EL2_CV_SHIFT)

The list is a bit longer if you look for hex numbers also, but I think
I got them all.

-Geoff

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 1/7] arm64/kvm: Fix assembler compatibility of macros
  2014-10-03 22:27     ` Geoff Levand
@ 2014-10-06 10:10       ` Mark Rutland
  0 siblings, 0 replies; 65+ messages in thread
From: Mark Rutland @ 2014-10-06 10:10 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, Oct 03, 2014 at 11:27:48PM +0100, Geoff Levand wrote:
> On Fri, 2014-10-03 at 11:26 +0100, Mark Rutland wrote:
> > Would you be able to fix those up at the same time? It doesn't look like there
> > are too many, and it should be a fairly mechanical change:
> 
> Sure.
> 
> > [mark at leverpostej:~/src/linux]% grep '[0-9][UL]' -- arch/arm64/include/asm/kvm_arm.h    
> > #define VTTBR_BADDR_MASK  (((1LLU << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
> > #define VTTBR_VMID_SHIFT  (48LLU)
> > #define ESR_EL2_IL              (1U << 25)
> > #define ESR_EL2_ISV             (1U << ESR_EL2_ISV_SHIFT)
> > #define ESR_EL2_SAS             (3U << ESR_EL2_SAS_SHIFT)
> > #define ESR_EL2_CV              (1U << ESR_EL2_CV_SHIFT)
> 
> The list is a bit longer if you look for hex numbers also, but I think
> I got them all.

Ah, yes.

Cheers for fixing the rest!

Mark.

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 2/7] arm64: Convert hcalls to use ISS field
  2014-10-03 21:56     ` Geoff Levand
@ 2014-10-06 10:13       ` Mark Rutland
  0 siblings, 0 replies; 65+ messages in thread
From: Mark Rutland @ 2014-10-06 10:13 UTC (permalink / raw)
  To: linux-arm-kernel

[...]

> > >  el1_sync:
> > > -	mrs	x1, esr_el2
> > > -	lsr	x1, x1, #26
> > > -	cmp	x1, #0x16
> > > -	b.ne	2f				// Not an HVC trap
> > > -	cbz	x0, 1f
> > > -	msr	vbar_el2, x0			// Set vbar_el2
> > > +	mrs	x18, esr_el2
> > > +	lsr	x17, x18, #26			// x17=EC
> > 
> > The first patch enabled kvm_arm.h macros to be used from asm. Can't we
> > use them here? If we don't include kvm_arm.h in the hyp stub, maybe we
> > should (ideally we'd factor common stuff out, but I can see that getting
> > messy fast).
> 
> I didn't want to tackle the cleanup of the headers in this patch
> series, and I thought it odd to include a kvm header in this core
> arm64 file, but maybe it is better to include kvm_arm.h and use the
> ESR_EL2_ macros so the values are at least easier to understand and
> keep track of.

I'd rather we used the mnemonics. If that involves pulling in the kvm
header I'd rather we do that for the moment.

[...]

> > > diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
> > > index b72aa9f..9607f15 100644
> > > --- a/arch/arm64/kvm/hyp.S
> > > +++ b/arch/arm64/kvm/hyp.S
> > > @@ -26,6 +26,7 @@
> > >  #include <asm/kvm_asm.h>
> > >  #include <asm/kvm_arm.h>
> > >  #include <asm/kvm_mmu.h>
> > > +#include <asm/virt.h>
> > 
> > As far as I can see, virt.h only defines BOOT_CPU_MODE_EL{1,2}, which we
> > don't use below. So I don't think we need this include.
> 
> We need asm/virt.h for the new HVC_ macros added to it by this
> patch (above)...

Sorry, my comment was bogus. I'd evidently confused myself when reading
this.

Mark.

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 0/7] arm64 kexec kernel patches V3
  2014-10-03 21:16   ` Geoff Levand
@ 2014-10-07 13:40     ` Vivek Goyal
  2014-10-07 18:29       ` Geoff Levand
  2014-10-08  9:42       ` Mark Rutland
  2014-10-07 15:22     ` Mark Rutland
  1 sibling, 2 replies; 65+ messages in thread
From: Vivek Goyal @ 2014-10-07 13:40 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, Oct 03, 2014 at 02:16:11PM -0700, Geoff Levand wrote:
> Hi,
> 
> On Wed, 2014-10-01 at 11:19 -0400, Vivek Goyal wrote:
> > On Thu, Sep 25, 2014 at 12:23:26AM +0000, Geoff Levand wrote:
> > > Hi All,
> > > 
> > > This series adds the core support for kexec re-boots on arm64.  I have tested
> > > with the ARM VE fast model using various kernel config options for both the
> > > first and second stage kernels.
> > 
> > Does this patch series work with kexec on UEFI machines?
> 
> I have not done much more than some simple re-boot tests using the
> base model (FVP_Base_AEMv8A-AEMv8A) and the Linaro 14.08 efi build,
> but yes, it should work.

[CC Dave Young ]

I have not looked at the user space patches but are you preparing efi
setup data and passing all the runtime map information to second kernel.

I am hoping on arm64 we have the CONFIG_EFI_RUNTIME_MAP=y and it works
well.

Thanks
Vivek

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 0/7] arm64 kexec kernel patches V3
  2014-10-02 22:59       ` Geoff Levand
@ 2014-10-07 13:43         ` Vivek Goyal
  2014-10-07 14:06           ` Mark Rutland
  0 siblings, 1 reply; 65+ messages in thread
From: Vivek Goyal @ 2014-10-07 13:43 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, Oct 02, 2014 at 03:59:55PM -0700, Geoff Levand wrote:
> Hi Vivek,
> 
> On Thu, 2014-10-02 at 15:08 -0400, Vivek Goyal wrote:
> > On Tue, Sep 30, 2014 at 02:27:56PM -0700, Geoff Levand wrote:
> > > For a running system you can check the device tree:
> > > 
> > >   cat /proc/device-tree/cpus/cpu\@0/enable-method | hexdump -C
> > > 
> > 
> > So system I have supports spin-table method for cpu bringup. How do I 
> > test your patches with that system. Are there any patches on your
> > spin-table branch which can make it working?
> 
> If possible, check if there is a firmware update that supports PSCI.
> 
> My spin-table patches are now out of date, and fixing those up is
> now low priority.

So psci method for cpu bring up is more popular as comapred to 
spin-table one? 

> 
> I modified kexec-tools to only issue a message, but accept a device
> tree that does not have the new cpu-return-addr property that is
> needed for kexec on spin-table systems.  Since the spin-table stuff
> is only for managing secondary CPUs, this change should allow you to
> test kexec with a 1st stage kernel built with CONFIG_SMP=n.
> 
> Since the secondary CPUs will have never left the spin-table, you
> should be able to kexec re-boot into an SMP kernel, but you will
> not be able to do a successful kexec re-boot from there.

Ok, I can compile kernel with CONFIG_SMP=y but use maxcpus=1 for first
kernel and hopefully that works.

Thanks
Vivek

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 6/7] arm64/kexec: Add core kexec support
  2014-10-03 18:35         ` Geoff Levand
@ 2014-10-07 13:44           ` Vivek Goyal
  2014-10-07 18:42             ` Geoff Levand
  0 siblings, 1 reply; 65+ messages in thread
From: Vivek Goyal @ 2014-10-07 13:44 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, Oct 03, 2014 at 11:35:03AM -0700, Geoff Levand wrote:
> Hi Vivek,
> 
> On Wed, 2014-10-01 at 10:56 -0400, Vivek Goyal wrote:
> > On Tue, Sep 30, 2014 at 12:54:37PM -0700, Geoff Levand wrote:
> > 
> > [..]
> > > > > +{
> > > > > +	switch (flag) {
> > > > > +	case IND_INDIRECTION:
> > > > > +	case IND_SOURCE:
> > > > > +		__flush_dcache_area(addr, PAGE_SIZE);
> > > > > +		break;
> > > > 
> > > > So what does __flush_dcache_area() do? Flush data caches. IIUC, addr
> > > > is virtual address at this point of time. While copying pages and
> > > > walking through the list, I am assuming you have switched off page
> > > > tables and you are in some kind of 1:1 physical mode. So how did
> > > > flushing data caches related to a virtual address help. I guess we
> > > > are not even accessing that virtual address now.
> > > 
> > > __flush_dcache_area(), and the underling aarch64 civac instruction
> > > operate on virtual addresses.  Here we are still running with the
> > > MMU on and the identity mapping has not yet been enabled.  This is
> > > the sequence:
> > > 
> > >   flush dcache -> turn off MMU, dcache -> access memory (PoC) directly 
> > 
> > Sorry, I don't understand that why do we need to flush dcache for source
> > and indirection page addresses. Some information here will help.
> 
> I think Mark answered this.  The architecture requires us to flush to
> the point of coherency (PoC) anything that will be used after the
> dcache is disabled.
> 
> For more info you can look at the section'D4.4.7 Cache maintenance
> operations' in the ARMv8 Reference Manual you can get from here (after
> registering):
> 
>   http://infocenter.arm.com/help/topic/com.arm.doc.subset.architecture.reference/index.html

Geoff,

So as Mark and I discussed need of purgatory code in other mails, are you
plannign to enable purgatory on arm64.

Thanks
Vivek

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 0/7] arm64 kexec kernel patches V3
  2014-10-07 13:43         ` Vivek Goyal
@ 2014-10-07 14:06           ` Mark Rutland
  0 siblings, 0 replies; 65+ messages in thread
From: Mark Rutland @ 2014-10-07 14:06 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Oct 07, 2014 at 02:43:20PM +0100, Vivek Goyal wrote:
> On Thu, Oct 02, 2014 at 03:59:55PM -0700, Geoff Levand wrote:
> > Hi Vivek,
> > 
> > On Thu, 2014-10-02 at 15:08 -0400, Vivek Goyal wrote:
> > > On Tue, Sep 30, 2014 at 02:27:56PM -0700, Geoff Levand wrote:
> > > > For a running system you can check the device tree:
> > > > 
> > > >   cat /proc/device-tree/cpus/cpu\@0/enable-method | hexdump -C
> > > > 
> > > 
> > > So system I have supports spin-table method for cpu bringup. How do I 
> > > test your patches with that system. Are there any patches on your
> > > spin-table branch which can make it working?
> > 
> > If possible, check if there is a firmware update that supports PSCI.
> > 
> > My spin-table patches are now out of date, and fixing those up is
> > now low priority.
> 
> So psci method for cpu bring up is more popular as comapred to 
> spin-table one? 

PSCI is the preferred method, and it is what we expect to see in server
machines featuring EL3. There are implementation in KVM and Xen, and on
physical machines (Juno so ar) with the ARM Trusted Firmware. The Linux
bootwrapper features a trivial (pre-v1) implementation, and there has
been work to implement PSCI in U-Boot.

The spin-table mechanism has no provision for returning CPUs to the
firmware, so it's not possible to hotplug CPUs. Vendors are using during
bringup so firmware and kernel can be developed in parallel, but I would
expect that for enterprise products PSCI should be available.

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 0/7] arm64 kexec kernel patches V3
  2014-10-03 21:16   ` Geoff Levand
  2014-10-07 13:40     ` Vivek Goyal
@ 2014-10-07 15:22     ` Mark Rutland
  2014-10-07 18:28       ` Geoff Levand
  1 sibling, 1 reply; 65+ messages in thread
From: Mark Rutland @ 2014-10-07 15:22 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Geoff,

> > Does this patch series work with kexec on UEFI machines?
> 
> I have not done much more than some simple re-boot tests using the
> base model (FVP_Base_AEMv8A-AEMv8A) and the Linaro 14.08 efi build,
> but yes, it should work.

Where you booting the kernel as an UEFI application, or were you using the
Linux loader? The latter arguably isn't a UEFI boot because the kernel doesn't
know of or have access to UEFI (and arguably said laoder shouldn't even be
there).

I've just had a go on Juno, but I wasn't able to get the tool to work.
It seems that it's unhappy with the DTB because the EFI stub deleted the
memory nodes (to enforce use of the UEFI memory map). I'm not sure what
the tool needs to parse the memory map for, so I don't know what the
best way around that is.

nanook at localhost:~/opt/sbin$ ./kexec /home/nanook/vmlinux 
kexec version: 14.10.02.15.58-g706fbeb
Modified cmdline: root=/dev/nfs 
Unable to find /proc/device-tree//chosen/linux,stdout-path, printing from purgatory is diabled
get_memory_ranges_dt:756: Invalid /proc/device-tree.
kexec: kexec/arch/arm64/kexec-arm64.c:625: virt_to_phys: Assertion `arm64_mem.memstart' failed.
Aborted

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 0/7] arm64 kexec kernel patches V3
  2014-09-25  0:23 [PATCH 0/7] arm64 kexec kernel patches V3 Geoff Levand
                   ` (8 preceding siblings ...)
  2014-10-01 15:19 ` Vivek Goyal
@ 2014-10-07 18:09 ` Vivek Goyal
  2014-10-07 20:07   ` Geoff Levand
  2014-10-09  9:22 ` Will Deacon
  10 siblings, 1 reply; 65+ messages in thread
From: Vivek Goyal @ 2014-10-07 18:09 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, Sep 25, 2014 at 12:23:26AM +0000, Geoff Levand wrote:
> Hi All,
> 
> This series adds the core support for kexec re-boots on arm64.  I have tested
> with the ARM VE fast model using various kernel config options for both the
> first and second stage kernels.
> 
> To load a second stage kernel and execute a kexec re-boot on arm64 my patches to
> kexec-tools [2], which have not yet been merged upstream, are needed.
> 
> This series does not include some re-work of the spin-table CPU enable method
> that is needed to support it, nor does it include some re-work of KVM to support
> CPU soft reset.  A kernel built with these patches will boot and run correctly,
> but will fail to load a kexec kernel if running on a machine with any spin-table
> enabled CPUs and will fail the kexec re-boot if the first stage kernel was built
> with CONFIG_KVM=y.  Work-in-progress patches to support these are in the master
> branch of my linux-kexec repository [1].
> 
> Patches 1-4 rework the arm64 hcall mechanism to give the arm64 soft_restart()
> routine the ability to switch exception levels from EL1 to EL2 for kernels that
> were entered in EL2.
> 
> Patch 5 moves proc-macros.S from arm64/mm to arm64/include/asm so that the
> dcache_line_size macro it defines can be uesd by kexec's relocate kernel
> routine.
> 
> Patches 11 and 12 add the actual kexec support.
> 
> Please consider all patches for inclusion.  Any comments or suggestions on how 
> to improve are welcome.
> 
> [1]  https://git.linaro.org/people/geoff.levand/linux-kexec.git
> [2]  https://git.linaro.org/people/geoff.levand/kexec-tools.git
> 

Hi Geoff,

I tried and I get following.

[root at apm-mustang-ev3-06 sbin]# ./kexec -l /root/git/sa2/vmlinux
--initrd=/boot/initramfs-3.17.0-rc7+.img
--append="BOOT_IMAGE=/vmlinuz-3.17.0-rc5+
root=/dev/mapper/fedora_dhcp--17--165-root ro
rd.lvm.lv=fedora_dhcp-17-165/root rd.lvm.lv=fedora_dhcp-17-165/swap
vconsole.font=latarcyrheb-sun16 console=tty0 console=ttyS0,115200
LANG=en_US.UTF-8 crashkernel=160M earlyprintk=ttyS0,115200"
kexec version: 14.10.02.15.58-g706fbeb
Modified cmdline: root=/dev/mapper/rhsfadp_apm--mustang--ev3--06-root 
Unable to find /proc/device-tree//chosen/linux,stdout-path, printing from
purgatory is diabled
get_memory_ranges_dt:756: Invalid /proc/device-tree.
kexec: kexec/arch/arm64/kexec-arm64.c:625: virt_to_phys: Assertion
`arm64_mem.memstart' failed.
Aborted


What's "Modified cmdline" ? That seems to have truncated everything I 
passed in.

Thanks
Vivek

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 0/7] arm64 kexec kernel patches V3
  2014-10-07 15:22     ` Mark Rutland
@ 2014-10-07 18:28       ` Geoff Levand
  0 siblings, 0 replies; 65+ messages in thread
From: Geoff Levand @ 2014-10-07 18:28 UTC (permalink / raw)
  To: linux-arm-kernel


On Tue, 2014-10-07 at 16:22 +0100, Mark Rutland wrote:
> Hi Geoff,
> 
> > > Does this patch series work with kexec on UEFI machines?
> > 
> > I have not done much more than some simple re-boot tests using the
> > base model (FVP_Base_AEMv8A-AEMv8A) and the Linaro 14.08 efi build,
> > but yes, it should work.
> 
> Where you booting the kernel as an UEFI application, or were you using the
> Linux loader? The latter arguably isn't a UEFI boot because the kernel doesn't
> know of or have access to UEFI (and arguably said laoder shouldn't even be
> there).

I used the Linux loader. 

> I've just had a go on Juno, but I wasn't able to get the tool to work.

No, I wouldn't think it would.  Proper UEFI re-boot is
still todo.

-Geoff

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 0/7] arm64 kexec kernel patches V3
  2014-10-07 13:40     ` Vivek Goyal
@ 2014-10-07 18:29       ` Geoff Levand
  2014-10-08  9:42       ` Mark Rutland
  1 sibling, 0 replies; 65+ messages in thread
From: Geoff Levand @ 2014-10-07 18:29 UTC (permalink / raw)
  To: linux-arm-kernel


On Tue, 2014-10-07 at 09:40 -0400, Vivek Goyal wrote:
> On Fri, Oct 03, 2014 at 02:16:11PM -0700, Geoff Levand wrote:
> > Hi,
> > 
> > On Wed, 2014-10-01 at 11:19 -0400, Vivek Goyal wrote:
> > > On Thu, Sep 25, 2014 at 12:23:26AM +0000, Geoff Levand wrote:
> > > > Hi All,
> > > > 
> > > > This series adds the core support for kexec re-boots on arm64.  I have tested
> > > > with the ARM VE fast model using various kernel config options for both the
> > > > first and second stage kernels.
> > > 
> > > Does this patch series work with kexec on UEFI machines?
> > 
> > I have not done much more than some simple re-boot tests using the
> > base model (FVP_Base_AEMv8A-AEMv8A) and the Linaro 14.08 efi build,
> > but yes, it should work.
> 
> [CC Dave Young ]
> 
> I have not looked at the user space patches but are you preparing efi
> setup data and passing all the runtime map information to second kernel.

No, I have not done that yet.

-Geoff

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 6/7] arm64/kexec: Add core kexec support
  2014-10-07 13:44           ` Vivek Goyal
@ 2014-10-07 18:42             ` Geoff Levand
  2014-10-07 18:45               ` Vivek Goyal
  2014-10-07 18:48               ` Vivek Goyal
  0 siblings, 2 replies; 65+ messages in thread
From: Geoff Levand @ 2014-10-07 18:42 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Vivek,

On Tue, 2014-10-07 at 09:44 -0400, Vivek Goyal wrote:
> So as Mark and I discussed need of purgatory code in other mails, are you
> plannign to enable purgatory on arm64.

Adding purgatory code to arm64 is low priority, and I currently
have no plan to do that.  Users are asking for kdump, and proper
UEFI support, so that is what I will work towards.

-Geoff

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 6/7] arm64/kexec: Add core kexec support
  2014-10-07 18:42             ` Geoff Levand
@ 2014-10-07 18:45               ` Vivek Goyal
  2014-10-07 20:12                 ` Geoff Levand
  2014-10-07 18:48               ` Vivek Goyal
  1 sibling, 1 reply; 65+ messages in thread
From: Vivek Goyal @ 2014-10-07 18:45 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Oct 07, 2014 at 11:42:00AM -0700, Geoff Levand wrote:
> Hi Vivek,
> 
> On Tue, 2014-10-07 at 09:44 -0400, Vivek Goyal wrote:
> > So as Mark and I discussed need of purgatory code in other mails, are you
> > plannign to enable purgatory on arm64.
> 
> Adding purgatory code to arm64 is low priority, and I currently
> have no plan to do that.  Users are asking for kdump, and proper
> UEFI support, so that is what I will work towards.

I think having purgatory enabled is very important here as in kernel
you are hardcoding that one of the segments is DTB and doing all the
magic tricks w.r.t putting a magic number. I think as an interface
that seems bad. So I think atleast we will to fix the purgatory part
of it.

Thanks
Vivek

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 6/7] arm64/kexec: Add core kexec support
  2014-10-07 18:42             ` Geoff Levand
  2014-10-07 18:45               ` Vivek Goyal
@ 2014-10-07 18:48               ` Vivek Goyal
  1 sibling, 0 replies; 65+ messages in thread
From: Vivek Goyal @ 2014-10-07 18:48 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Oct 07, 2014 at 11:42:00AM -0700, Geoff Levand wrote:
> Hi Vivek,
> 
> On Tue, 2014-10-07 at 09:44 -0400, Vivek Goyal wrote:
> > So as Mark and I discussed need of purgatory code in other mails, are you
> > plannign to enable purgatory on arm64.
> 
> Adding purgatory code to arm64 is low priority, and I currently
> have no plan to do that.  Users are asking for kdump, and proper
> UEFI support, so that is what I will work towards.

kdump will make sure of purgatory too. And there does not seem to be
any logic which verifies the checksums of loaded segments hence reducing
the reliability of kdump operation.

So I think fixing the purgatory part here is a must. It leads to better
design as well as improved reliability for kexec/kdump operations.

Thanks
Vivek

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 0/7] arm64 kexec kernel patches V3
  2014-10-07 18:09 ` Vivek Goyal
@ 2014-10-07 20:07   ` Geoff Levand
  2014-10-08  9:52     ` Leif Lindholm
  0 siblings, 1 reply; 65+ messages in thread
From: Geoff Levand @ 2014-10-07 20:07 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Vivek,

On Tue, 2014-10-07 at 14:09 -0400, Vivek Goyal wrote:
> I tried and I get following.
> 
> [root at apm-mustang-ev3-06 sbin]# ./kexec -l /root/git/sa2/vmlinux
> --initrd=/boot/initramfs-3.17.0-rc7+.img
> --append="BOOT_IMAGE=/vmlinuz-3.17.0-rc5+
> root=/dev/mapper/fedora_dhcp--17--165-root ro
> rd.lvm.lv=fedora_dhcp-17-165/root rd.lvm.lv=fedora_dhcp-17-165/swap
> vconsole.font=latarcyrheb-sun16 console=tty0 console=ttyS0,115200
> LANG=en_US.UTF-8 crashkernel=160M earlyprintk=ttyS0,115200"
> kexec version: 14.10.02.15.58-g706fbeb
> Modified cmdline: root=/dev/mapper/rhsfadp_apm--mustang--ev3--06-root 
> Unable to find /proc/device-tree//chosen/linux,stdout-path, printing from
> purgatory is diabled
> get_memory_ranges_dt:756: Invalid /proc/device-tree.
> kexec: kexec/arch/arm64/kexec-arm64.c:625: virt_to_phys: Assertion
> `arm64_mem.memstart' failed.
> Aborted

I added a kexec-tools fix for this failed assertion.  Could you pull
my latest and send me the output with the kexec --debug option added?

I guess you are trying to boot the kernel as a UEFI application,
which I have not yet done, so not supported.  You need to use a
UEFI kernel loader.

> What's "Modified cmdline" ? That seems to have truncated everything I 
> passed in.

The kexec-tools fs2dt code prints out the Modified cmdline when a node
is added to the DT.  In this case it is OK, since the arm64 code first
adds other nodes.  Your new cmdline is added later, but you did not
get there do to the failed assertion.

-Geoff

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 6/7] arm64/kexec: Add core kexec support
  2014-10-07 18:45               ` Vivek Goyal
@ 2014-10-07 20:12                 ` Geoff Levand
  2014-10-07 20:22                   ` Vivek Goyal
  2014-10-08  9:28                   ` Mark Rutland
  0 siblings, 2 replies; 65+ messages in thread
From: Geoff Levand @ 2014-10-07 20:12 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Vivek,

On Tue, 2014-10-07 at 14:45 -0400, Vivek Goyal wrote:
> On Tue, Oct 07, 2014 at 11:42:00AM -0700, Geoff Levand wrote:
> > Adding purgatory code to arm64 is low priority, and I currently
> > have no plan to do that.  Users are asking for kdump, and proper
> > UEFI support, so that is what I will work towards.
> 
> I think having purgatory enabled is very important here as in kernel
> you are hardcoding that one of the segments is DTB and doing all the
> magic tricks w.r.t putting a magic number. 

I don't argue that having purgatory code could be useful, but as of
now, enabling the other features is what I'll work towards.
 
Regarding the device tree magic number, I'm wondering if you missed
that the device tree has a header, and that header has a magic
number.  See here:

  http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/scripts/dtc/libfdt/fdt.h#n6

-Geoff

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 6/7] arm64/kexec: Add core kexec support
  2014-10-07 20:12                 ` Geoff Levand
@ 2014-10-07 20:22                   ` Vivek Goyal
  2014-10-09 22:26                     ` Geoff Levand
  2014-10-08  9:28                   ` Mark Rutland
  1 sibling, 1 reply; 65+ messages in thread
From: Vivek Goyal @ 2014-10-07 20:22 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Oct 07, 2014 at 01:12:57PM -0700, Geoff Levand wrote:
> Hi Vivek,
> 
> On Tue, 2014-10-07 at 14:45 -0400, Vivek Goyal wrote:
> > On Tue, Oct 07, 2014 at 11:42:00AM -0700, Geoff Levand wrote:
> > > Adding purgatory code to arm64 is low priority, and I currently
> > > have no plan to do that.  Users are asking for kdump, and proper
> > > UEFI support, so that is what I will work towards.
> > 
> > I think having purgatory enabled is very important here as in kernel
> > you are hardcoding that one of the segments is DTB and doing all the
> > magic tricks w.r.t putting a magic number. 
> 
> I don't argue that having purgatory code could be useful, but as of
> now, enabling the other features is what I'll work towards.
>  
> Regarding the device tree magic number, I'm wondering if you missed
> that the device tree has a header, and that header has a magic
> number.  See here:
> 
>   http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/scripts/dtc/libfdt/fdt.h#n6
> 

Problem is this that if you put code in kernel once which does something
which purgatory ought to do, you will never be able to remove it for
backward compatibility reasons. Older versions of kexec-tools will 
continue to rely on it. Also how in kernel you would know that now
purgatory will take care of this and kernel does not have to worry
about something. So it is a good idea to integrate the purgatory support
from the very beginning.

Also, verifying checksums of loaded segments before jumping to that kernel
is a must from feature point of view.

Thanks
Vivek

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 6/7] arm64/kexec: Add core kexec support
  2014-10-07 20:12                 ` Geoff Levand
  2014-10-07 20:22                   ` Vivek Goyal
@ 2014-10-08  9:28                   ` Mark Rutland
  1 sibling, 0 replies; 65+ messages in thread
From: Mark Rutland @ 2014-10-08  9:28 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Oct 07, 2014 at 09:12:57PM +0100, Geoff Levand wrote:
> Hi Vivek,
> 
> On Tue, 2014-10-07 at 14:45 -0400, Vivek Goyal wrote:
> > On Tue, Oct 07, 2014 at 11:42:00AM -0700, Geoff Levand wrote:
> > > Adding purgatory code to arm64 is low priority, and I currently
> > > have no plan to do that.  Users are asking for kdump, and proper
> > > UEFI support, so that is what I will work towards.
> > 
> > I think having purgatory enabled is very important here as in kernel
> > you are hardcoding that one of the segments is DTB and doing all the
> > magic tricks w.r.t putting a magic number. 
> 
> I don't argue that having purgatory code could be useful, but as of
> now, enabling the other features is what I'll work towards.
>  
> Regarding the device tree magic number, I'm wondering if you missed
> that the device tree has a header, and that header has a magic
> number.  See here:
> 
>   http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/scripts/dtc/libfdt/fdt.h#n6

As I mentioned elsewhere, regardless of whether we can detect if a
segment is a DTB, we're still encoding the policy of what to do with
that within the kernel, and there are cases that is problematic for.

Leaving at least the register setup to the caller's discretion (by way
of a userspace provided purgatory) avoids those problems entirely.

kexec_load_file is more complicated in that regard due to the less
general interface provided.

Mark.

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 0/7] arm64 kexec kernel patches V3
  2014-10-07 13:40     ` Vivek Goyal
  2014-10-07 18:29       ` Geoff Levand
@ 2014-10-08  9:42       ` Mark Rutland
  2014-10-09  3:21         ` Dave Young
  1 sibling, 1 reply; 65+ messages in thread
From: Mark Rutland @ 2014-10-08  9:42 UTC (permalink / raw)
  To: linux-arm-kernel

[Cc'ing the arm efi guys]

On Tue, Oct 07, 2014 at 02:40:06PM +0100, Vivek Goyal wrote:
> On Fri, Oct 03, 2014 at 02:16:11PM -0700, Geoff Levand wrote:
> > Hi,
> > 
> > On Wed, 2014-10-01 at 11:19 -0400, Vivek Goyal wrote:
> > > On Thu, Sep 25, 2014 at 12:23:26AM +0000, Geoff Levand wrote:
> > > > Hi All,
> > > > 
> > > > This series adds the core support for kexec re-boots on arm64.  I have tested
> > > > with the ARM VE fast model using various kernel config options for both the
> > > > first and second stage kernels.
> > > 
> > > Does this patch series work with kexec on UEFI machines?
> > 
> > I have not done much more than some simple re-boot tests using the
> > base model (FVP_Base_AEMv8A-AEMv8A) and the Linaro 14.08 efi build,
> > but yes, it should work.
> 
> [CC Dave Young ]
> 
> I have not looked at the user space patches but are you preparing efi
> setup data and passing all the runtime map information to second kernel.
> 
> I am hoping on arm64 we have the CONFIG_EFI_RUNTIME_MAP=y and it works
> well.

>From a quick look at mainline that config option just gates exposing
that info under sysfs, and we never call efi_runtime_map_setup on arm64.
I gues the x86 kexec tool parses that and sets up some data structure?

For arm64 we have runtime map information in the initial DTB, and this
should have been copied into the DTB we pass to the second kernel. See
Documentation/arm/uefi.txt and drivers/firmware/efi/libstub/fdt.c.

Is that sufficient?

Mark.

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 0/7] arm64 kexec kernel patches V3
  2014-10-07 20:07   ` Geoff Levand
@ 2014-10-08  9:52     ` Leif Lindholm
  2014-10-08 10:07       ` Mark Rutland
  0 siblings, 1 reply; 65+ messages in thread
From: Leif Lindholm @ 2014-10-08  9:52 UTC (permalink / raw)
  To: linux-arm-kernel

On 7 October 2014 21:07, Geoff Levand <geoff@infradead.org> wrote:
> You need to use a UEFI kernel loader.

No such thing exists.
The "built-in Linux loader" is a u-boot emulator (try it, it will work
with a uImage on v7) added for early development purposes and
unfortunately released in the wild.
It will very shortly disappear from Linaro UEFI builds.

/
    Leif

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 0/7] arm64 kexec kernel patches V3
  2014-10-08  9:52     ` Leif Lindholm
@ 2014-10-08 10:07       ` Mark Rutland
  0 siblings, 0 replies; 65+ messages in thread
From: Mark Rutland @ 2014-10-08 10:07 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, Oct 08, 2014 at 10:52:09AM +0100, Leif Lindholm wrote:
> On 7 October 2014 21:07, Geoff Levand <geoff@infradead.org> wrote:
> > You need to use a UEFI kernel loader.
> 
> No such thing exists.
> The "built-in Linux loader" is a u-boot emulator (try it, it will work
> with a uImage on v7) added for early development purposes and
> unfortunately released in the wild.
> It will very shortly disappear from Linaro UEFI builds.

To further Leif's point, the "built-in Linux loader" is not in the UEFI
spec, and is therefore not a portion of UEFI. It is a crutch, and it
should not be relied upon as anything more than an aid during bringup.

Mark.

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 0/7] arm64 kexec kernel patches V3
  2014-10-08  9:42       ` Mark Rutland
@ 2014-10-09  3:21         ` Dave Young
  2014-10-09 10:09           ` Mark Rutland
  0 siblings, 1 reply; 65+ messages in thread
From: Dave Young @ 2014-10-09  3:21 UTC (permalink / raw)
  To: linux-arm-kernel

On 10/08/14 at 10:42am, Mark Rutland wrote:
> [Cc'ing the arm efi guys]
> 
> On Tue, Oct 07, 2014 at 02:40:06PM +0100, Vivek Goyal wrote:
> > On Fri, Oct 03, 2014 at 02:16:11PM -0700, Geoff Levand wrote:
> > > Hi,
> > > 
> > > On Wed, 2014-10-01 at 11:19 -0400, Vivek Goyal wrote:
> > > > On Thu, Sep 25, 2014 at 12:23:26AM +0000, Geoff Levand wrote:
> > > > > Hi All,
> > > > > 
> > > > > This series adds the core support for kexec re-boots on arm64.  I have tested
> > > > > with the ARM VE fast model using various kernel config options for both the
> > > > > first and second stage kernels.
> > > > 
> > > > Does this patch series work with kexec on UEFI machines?
> > > 
> > > I have not done much more than some simple re-boot tests using the
> > > base model (FVP_Base_AEMv8A-AEMv8A) and the Linaro 14.08 efi build,
> > > but yes, it should work.
> > 
> > [CC Dave Young ]
> > 
> > I have not looked at the user space patches but are you preparing efi
> > setup data and passing all the runtime map information to second kernel.
> > 
> > I am hoping on arm64 we have the CONFIG_EFI_RUNTIME_MAP=y and it works
> > well.
> 
> From a quick look at mainline that config option just gates exposing
> that info under sysfs, and we never call efi_runtime_map_setup on arm64.
> I gues the x86 kexec tool parses that and sets up some data structure?
> 
> For arm64 we have runtime map information in the initial DTB, and this
> should have been copied into the DTB we pass to the second kernel. See
> Documentation/arm/uefi.txt and drivers/firmware/efi/libstub/fdt.c.
> 
> Is that sufficient?

That will be sufficient for runtime maps, but several phys addresses need to be
saved: /sys/firmware/efi/{fw_vendor,runtime,config_table}

>From uefi spec these addresses are updated to virtual addresses after entering
virtual mode.

Though it should work without the runtime map exporting to sysfs, It's not bad
to export them, it's good to have for debugging purpose and it will be consistant
across different arches.

Thanks
Dave

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 0/7] arm64 kexec kernel patches V3
  2014-09-25  0:23 [PATCH 0/7] arm64 kexec kernel patches V3 Geoff Levand
                   ` (9 preceding siblings ...)
  2014-10-07 18:09 ` Vivek Goyal
@ 2014-10-09  9:22 ` Will Deacon
  10 siblings, 0 replies; 65+ messages in thread
From: Will Deacon @ 2014-10-09  9:22 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, Sep 25, 2014 at 01:23:26AM +0100, Geoff Levand wrote:
> Hi All,

Hi Geoff,

> This series adds the core support for kexec re-boots on arm64.  I have tested
> with the ARM VE fast model using various kernel config options for both the
> first and second stage kernels.
> 
> To load a second stage kernel and execute a kexec re-boot on arm64 my patches to
> kexec-tools [2], which have not yet been merged upstream, are needed.
> 
> This series does not include some re-work of the spin-table CPU enable method
> that is needed to support it, nor does it include some re-work of KVM to support
> CPU soft reset.  A kernel built with these patches will boot and run correctly,
> but will fail to load a kexec kernel if running on a machine with any spin-table
> enabled CPUs and will fail the kexec re-boot if the first stage kernel was built
> with CONFIG_KVM=y.  Work-in-progress patches to support these are in the master
> branch of my linux-kexec repository [1].

I've been following this thread on the periphery and I think Vivek makes
some good points wrt ABI and purgatory. I'd like to see his Ack on the
series before we merge it.

However, I don't think we need to wait for a spin-table implementation of
CPU hotplug -- that can be done as a seperate, independent series by
somebody fooli^H^H^H^H^Hbrave enough to try it.

Will

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 0/7] arm64 kexec kernel patches V3
  2014-10-09  3:21         ` Dave Young
@ 2014-10-09 10:09           ` Mark Rutland
  2014-10-09 10:19             ` Ard Biesheuvel
  2014-10-10  5:30             ` Dave Young
  0 siblings, 2 replies; 65+ messages in thread
From: Mark Rutland @ 2014-10-09 10:09 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, Oct 09, 2014 at 04:21:30AM +0100, Dave Young wrote:
> On 10/08/14 at 10:42am, Mark Rutland wrote:
> > [Cc'ing the arm efi guys]
> > 
> > On Tue, Oct 07, 2014 at 02:40:06PM +0100, Vivek Goyal wrote:
> > > On Fri, Oct 03, 2014 at 02:16:11PM -0700, Geoff Levand wrote:
> > > > Hi,
> > > > 
> > > > On Wed, 2014-10-01 at 11:19 -0400, Vivek Goyal wrote:
> > > > > On Thu, Sep 25, 2014 at 12:23:26AM +0000, Geoff Levand wrote:
> > > > > > Hi All,
> > > > > > 
> > > > > > This series adds the core support for kexec re-boots on arm64.  I have tested
> > > > > > with the ARM VE fast model using various kernel config options for both the
> > > > > > first and second stage kernels.
> > > > > 
> > > > > Does this patch series work with kexec on UEFI machines?
> > > > 
> > > > I have not done much more than some simple re-boot tests using the
> > > > base model (FVP_Base_AEMv8A-AEMv8A) and the Linaro 14.08 efi build,
> > > > but yes, it should work.
> > > 
> > > [CC Dave Young ]
> > > 
> > > I have not looked at the user space patches but are you preparing efi
> > > setup data and passing all the runtime map information to second kernel.
> > > 
> > > I am hoping on arm64 we have the CONFIG_EFI_RUNTIME_MAP=y and it works
> > > well.
> > 
> > From a quick look at mainline that config option just gates exposing
> > that info under sysfs, and we never call efi_runtime_map_setup on arm64.
> > I gues the x86 kexec tool parses that and sets up some data structure?
> > 
> > For arm64 we have runtime map information in the initial DTB, and this
> > should have been copied into the DTB we pass to the second kernel. See
> > Documentation/arm/uefi.txt and drivers/firmware/efi/libstub/fdt.c.
> > 
> > Is that sufficient?
> 
> That will be sufficient for runtime maps, but several phys addresses need to be
> saved: /sys/firmware/efi/{fw_vendor,runtime,config_table}
> 
> From uefi spec these addresses are updated to virtual addresses after entering
> virtual mode.

Ouch. That sounds painful.

I'd prefer to not have to duplicate these in the DTB if we can. Given we
should have the memmmap with the virtual<->physical mappings, could we
not do a lookup there before early dereferences if we happen to already
be in virtual mode?

> Though it should work without the runtime map exporting to sysfs, It's not bad
> to export them, it's good to have for debugging purpose and it will be consistant
> across different arches.

Exporting the sysfs entries makes sense for debug, but I'd very much
prefer that it were not our default solution for passing the maps to the
next kernel.

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 0/7] arm64 kexec kernel patches V3
  2014-10-09 10:09           ` Mark Rutland
@ 2014-10-09 10:19             ` Ard Biesheuvel
  2014-10-10  5:30             ` Dave Young
  1 sibling, 0 replies; 65+ messages in thread
From: Ard Biesheuvel @ 2014-10-09 10:19 UTC (permalink / raw)
  To: linux-arm-kernel

On 9 October 2014 12:09, Mark Rutland <mark.rutland@arm.com> wrote:
> On Thu, Oct 09, 2014 at 04:21:30AM +0100, Dave Young wrote:
>> On 10/08/14 at 10:42am, Mark Rutland wrote:
>> > [Cc'ing the arm efi guys]
>> >
>> > On Tue, Oct 07, 2014 at 02:40:06PM +0100, Vivek Goyal wrote:
>> > > On Fri, Oct 03, 2014 at 02:16:11PM -0700, Geoff Levand wrote:
>> > > > Hi,
>> > > >
>> > > > On Wed, 2014-10-01 at 11:19 -0400, Vivek Goyal wrote:
>> > > > > On Thu, Sep 25, 2014 at 12:23:26AM +0000, Geoff Levand wrote:
>> > > > > > Hi All,
>> > > > > >
>> > > > > > This series adds the core support for kexec re-boots on arm64.  I have tested
>> > > > > > with the ARM VE fast model using various kernel config options for both the
>> > > > > > first and second stage kernels.
>> > > > >
>> > > > > Does this patch series work with kexec on UEFI machines?
>> > > >
>> > > > I have not done much more than some simple re-boot tests using the
>> > > > base model (FVP_Base_AEMv8A-AEMv8A) and the Linaro 14.08 efi build,
>> > > > but yes, it should work.
>> > >
>> > > [CC Dave Young ]
>> > >
>> > > I have not looked at the user space patches but are you preparing efi
>> > > setup data and passing all the runtime map information to second kernel.
>> > >
>> > > I am hoping on arm64 we have the CONFIG_EFI_RUNTIME_MAP=y and it works
>> > > well.
>> >
>> > From a quick look at mainline that config option just gates exposing
>> > that info under sysfs, and we never call efi_runtime_map_setup on arm64.
>> > I gues the x86 kexec tool parses that and sets up some data structure?
>> >
>> > For arm64 we have runtime map information in the initial DTB, and this
>> > should have been copied into the DTB we pass to the second kernel. See
>> > Documentation/arm/uefi.txt and drivers/firmware/efi/libstub/fdt.c.
>> >
>> > Is that sufficient?
>>
>> That will be sufficient for runtime maps, but several phys addresses need to be
>> saved: /sys/firmware/efi/{fw_vendor,runtime,config_table}
>>
>> From uefi spec these addresses are updated to virtual addresses after entering
>> virtual mode.
>
> Ouch. That sounds painful.
>
> I'd prefer to not have to duplicate these in the DTB if we can. Given we
> should have the memmmap with the virtual<->physical mappings, could we
> not do a lookup there before early dereferences if we happen to already
> be in virtual mode?
>

According to the UEFI spec, only code and data regions used for UEFI
Runtime Services are supposed to be virtually remapped. The firmware
must not request virtual mappings for configuration tables that are
installed at boot time. (UEFI spec 2.3.6)

This means that the only spec compliant way to access this data is to
take its physical address (whose value remains accessible after
installing the virtual memory map) and ioremap() it. Anything else
violates the spec, even if it works currently on x86

>> Though it should work without the runtime map exporting to sysfs, It's not bad
>> to export them, it's good to have for debugging purpose and it will be consistant
>> across different arches.
>
> Exporting the sysfs entries makes sense for debug, but I'd very much
> prefer that it were not our default solution for passing the maps to the
> next kernel.
>

Agreed

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 6/7] arm64/kexec: Add core kexec support
  2014-10-07 20:22                   ` Vivek Goyal
@ 2014-10-09 22:26                     ` Geoff Levand
  2014-10-23 23:08                       ` Geoff Levand
  0 siblings, 1 reply; 65+ messages in thread
From: Geoff Levand @ 2014-10-09 22:26 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Vivek,

On Tue, 2014-10-07 at 16:22 -0400, Vivek Goyal wrote:
> Problem is this that if you put code in kernel once which does something
> which purgatory ought to do, you will never be able to remove it for
> backward compatibility reasons. Older versions of kexec-tools will 
> continue to rely on it. Also how in kernel you would know that now
> purgatory will take care of this and kernel does not have to worry
> about something. So it is a good idea to integrate the purgatory support
> from the very beginning.

I agree with you.  I will add a purgatory.

-Geoff

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 0/7] arm64 kexec kernel patches V3
  2014-10-09 10:09           ` Mark Rutland
  2014-10-09 10:19             ` Ard Biesheuvel
@ 2014-10-10  5:30             ` Dave Young
  1 sibling, 0 replies; 65+ messages in thread
From: Dave Young @ 2014-10-10  5:30 UTC (permalink / raw)
  To: linux-arm-kernel

On 10/09/14 at 11:09am, Mark Rutland wrote:
> On Thu, Oct 09, 2014 at 04:21:30AM +0100, Dave Young wrote:
> > On 10/08/14 at 10:42am, Mark Rutland wrote:
> > > [Cc'ing the arm efi guys]
> > > 
> > > On Tue, Oct 07, 2014 at 02:40:06PM +0100, Vivek Goyal wrote:
> > > > On Fri, Oct 03, 2014 at 02:16:11PM -0700, Geoff Levand wrote:
> > > > > Hi,
> > > > > 
> > > > > On Wed, 2014-10-01 at 11:19 -0400, Vivek Goyal wrote:
> > > > > > On Thu, Sep 25, 2014 at 12:23:26AM +0000, Geoff Levand wrote:
> > > > > > > Hi All,
> > > > > > > 
> > > > > > > This series adds the core support for kexec re-boots on arm64.  I have tested
> > > > > > > with the ARM VE fast model using various kernel config options for both the
> > > > > > > first and second stage kernels.
> > > > > > 
> > > > > > Does this patch series work with kexec on UEFI machines?
> > > > > 
> > > > > I have not done much more than some simple re-boot tests using the
> > > > > base model (FVP_Base_AEMv8A-AEMv8A) and the Linaro 14.08 efi build,
> > > > > but yes, it should work.
> > > > 
> > > > [CC Dave Young ]
> > > > 
> > > > I have not looked at the user space patches but are you preparing efi
> > > > setup data and passing all the runtime map information to second kernel.
> > > > 
> > > > I am hoping on arm64 we have the CONFIG_EFI_RUNTIME_MAP=y and it works
> > > > well.
> > > 
> > > From a quick look at mainline that config option just gates exposing
> > > that info under sysfs, and we never call efi_runtime_map_setup on arm64.
> > > I gues the x86 kexec tool parses that and sets up some data structure?
> > > 
> > > For arm64 we have runtime map information in the initial DTB, and this
> > > should have been copied into the DTB we pass to the second kernel. See
> > > Documentation/arm/uefi.txt and drivers/firmware/efi/libstub/fdt.c.
> > > 
> > > Is that sufficient?
> > 
> > That will be sufficient for runtime maps, but several phys addresses need to be
> > saved: /sys/firmware/efi/{fw_vendor,runtime,config_table}
> > 
> > From uefi spec these addresses are updated to virtual addresses after entering
> > virtual mode.
> 
> Ouch. That sounds painful.
> 
> I'd prefer to not have to duplicate these in the DTB if we can. Given we
> should have the memmmap with the virtual<->physical mappings, could we
> not do a lookup there before early dereferences if we happen to already
> be in virtual mode?

I do not have an idea to skip defeference these variables, see below usage: 

arch/arm64/kernel/efi.c, uefi_init():
        c16 = early_memremap(efi.systab->fw_vendor,
                             sizeof(vendor));
        if (c16) {
                for (i = 0; i < (int) sizeof(vendor) - 1 && *c16; ++i)
                        vendor[i] = c16[i];
                vendor[i] = '\0';
        }

drivers/firmware/efi/efi.c, efi_config_init():
        config_tables = early_memremap(efi.systab->tables,
                                       efi.systab->nr_tables * sz)

It's strange to me that in arm64 code systab->runtime is used directly without
ioremapping. but in x86 code, we do below:

arch/x86/platform/efi/efi.c, efi_runtime_init64():
        runtime = early_ioremap((unsigned long)efi.systab->runtime,
                        sizeof(efi_runtime_services_64_t));

for arm64 we have below:
        efi.systab = (__force void *)efi_lookup_mapped_addr(efi_system_table);
        if (efi.systab)
                set_bit(EFI_SYSTEM_TABLES, &efi.flags);

        [snip]

        /* Call SetVirtualAddressMap with the physical address of the map */
        runtime = efi.systab->runtime;
        efi.set_virtual_address_map = runtime->set_virtual_address_map;

efi_lookup_mapped_addr is only valid after set_virtual_address_mapp callback, so
it's questionable to use it above, the runtime should be physical address and should
be ioremapped as what's in x86 code.

> 
> > Though it should work without the runtime map exporting to sysfs, It's not bad
> > to export them, it's good to have for debugging purpose and it will be consistant
> > across different arches.
> 
> Exporting the sysfs entries makes sense for debug, but I'd very much
> prefer that it were not our default solution for passing the maps to the
> next kernel.

Agreed.

Thanks
Dave

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 6/7] arm64/kexec: Add core kexec support
  2014-10-09 22:26                     ` Geoff Levand
@ 2014-10-23 23:08                       ` Geoff Levand
  0 siblings, 0 replies; 65+ messages in thread
From: Geoff Levand @ 2014-10-23 23:08 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Vivek,

On Thu, 2014-10-09 at 15:26 -0700, Geoff Levand wrote:
> On Tue, 2014-10-07 at 16:22 -0400, Vivek Goyal wrote:
> > Problem is this that if you put code in kernel once which does something
> > which purgatory ought to do, you will never be able to remove it for
> > backward compatibility reasons. Older versions of kexec-tools will 
> > continue to rely on it. Also how in kernel you would know that now
> > purgatory will take care of this and kernel does not have to worry
> > about something. So it is a good idea to integrate the purgatory support
> > from the very beginning.
> 
> I agree with you.  I will add a purgatory.

I added a purgatory stage to kexec-tools, and verified it works
as expected.  My current kernel patches were generic enough to
support a purgatory stage without any additional changes.  I did
make some minor changes to the kernel comments so they use the
language of loading an 'image' instead of a 'kernel'.

I'll post a V5 kernel series.  Please review and reply with your
ack if you find them acceptable.

-Geoff

^ permalink raw reply	[flat|nested] 65+ messages in thread

end of thread, other threads:[~2014-10-23 23:08 UTC | newest]

Thread overview: 65+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-09-25  0:23 [PATCH 0/7] arm64 kexec kernel patches V3 Geoff Levand
2014-09-25  0:23 ` [PATCH 3/7] arm64: Add new hcall HVC_CALL_FUNC Geoff Levand
2014-09-25  0:23 ` [PATCH 4/7] arm64: Add EL2 switch to soft_restart Geoff Levand
2014-09-25  0:23 ` [PATCH 2/7] arm64: Convert hcalls to use ISS field Geoff Levand
2014-10-03 10:51   ` Mark Rutland
2014-10-03 21:56     ` Geoff Levand
2014-10-06 10:13       ` Mark Rutland
2014-09-25  0:23 ` [PATCH 1/7] arm64/kvm: Fix assembler compatibility of macros Geoff Levand
2014-10-03 10:26   ` Mark Rutland
2014-10-03 22:27     ` Geoff Levand
2014-10-06 10:10       ` Mark Rutland
2014-09-25  0:23 ` [PATCH 7/7] arm64/kexec: Enable kexec in the arm64 defconfig Geoff Levand
2014-09-25  0:23 ` [PATCH 5/7] arm64: Move proc-macros.S to include/asm Geoff Levand
2014-09-25  0:23 ` [PATCH 6/7] arm64/kexec: Add core kexec support Geoff Levand
2014-09-25 18:28   ` Vivek Goyal
2014-09-25 19:02     ` Geoff Levand
2014-09-25 19:08       ` Vivek Goyal
2014-09-30 18:18   ` Vivek Goyal
2014-09-30 19:54     ` Geoff Levand
2014-10-01 14:56       ` Vivek Goyal
2014-10-03 18:35         ` Geoff Levand
2014-10-07 13:44           ` Vivek Goyal
2014-10-07 18:42             ` Geoff Levand
2014-10-07 18:45               ` Vivek Goyal
2014-10-07 20:12                 ` Geoff Levand
2014-10-07 20:22                   ` Vivek Goyal
2014-10-09 22:26                     ` Geoff Levand
2014-10-23 23:08                       ` Geoff Levand
2014-10-08  9:28                   ` Mark Rutland
2014-10-07 18:48               ` Vivek Goyal
2014-10-01 16:16       ` Mark Rutland
2014-10-01 17:36         ` Vivek Goyal
2014-10-01 17:56           ` Mark Rutland
2014-10-01 17:47         ` Vivek Goyal
2014-10-01 18:03           ` Mark Rutland
2014-10-01 18:09             ` Vivek Goyal
2014-10-01 18:19               ` Mark Rutland
2014-10-01 18:31                 ` Vivek Goyal
2014-10-01 19:22             ` Vivek Goyal
2014-10-02 10:26               ` Mark Rutland
2014-10-02 13:54                 ` Vivek Goyal
2014-10-02 16:53                   ` Mark Rutland
2014-09-30 23:59   ` [PATCH V2 " Geoff Levand
2014-09-30 20:29 ` [PATCH 0/7] arm64 kexec kernel patches V3 Vivek Goyal
2014-09-30 21:27   ` Geoff Levand
2014-10-02 19:08     ` Vivek Goyal
2014-10-02 22:59       ` Geoff Levand
2014-10-07 13:43         ` Vivek Goyal
2014-10-07 14:06           ` Mark Rutland
2014-10-01 15:19 ` Vivek Goyal
2014-10-03 21:16   ` Geoff Levand
2014-10-07 13:40     ` Vivek Goyal
2014-10-07 18:29       ` Geoff Levand
2014-10-08  9:42       ` Mark Rutland
2014-10-09  3:21         ` Dave Young
2014-10-09 10:09           ` Mark Rutland
2014-10-09 10:19             ` Ard Biesheuvel
2014-10-10  5:30             ` Dave Young
2014-10-07 15:22     ` Mark Rutland
2014-10-07 18:28       ` Geoff Levand
2014-10-07 18:09 ` Vivek Goyal
2014-10-07 20:07   ` Geoff Levand
2014-10-08  9:52     ` Leif Lindholm
2014-10-08 10:07       ` Mark Rutland
2014-10-09  9:22 ` Will Deacon

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).