All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 07/19] arm64: Add back cpu_reset routines
  2016-01-15 19:18 ` Geoff Levand
@ 2016-01-15 19:18   ` Geoff Levand
  -1 siblings, 0 replies; 174+ messages in thread
From: Geoff Levand @ 2016-01-15 19:18 UTC (permalink / raw)
  To: linux-arm-kernel

Commit 68234df4ea7939f98431aa81113fbdce10c4a84b (arm64: kill flush_cache_all())
removed the global arm64 routines cpu_reset() and cpu_soft_restart() needed by
the arm64 kexec and kdump support.  Add simplified versions of those two
routines back with some changes needed for kexec in the new files cpu_reset.S,
and cpu_reset.h.

When a CPU is reset it needs to be put into the exception level it had
when it entered the kernel. Update cpu_reset() to accept an argument
which signals if the reset address needs to be entered at EL1 or EL2.

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/kernel/cpu-reset.S | 57 +++++++++++++++++++++++++++++++++++++++++++
 arch/arm64/kernel/cpu-reset.h | 29 ++++++++++++++++++++++
 2 files changed, 86 insertions(+)
 create mode 100644 arch/arm64/kernel/cpu-reset.S
 create mode 100644 arch/arm64/kernel/cpu-reset.h

diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S
new file mode 100644
index 0000000..f8d00d5
--- /dev/null
+++ b/arch/arm64/kernel/cpu-reset.S
@@ -0,0 +1,57 @@
+/*
+ * CPU reset routines
+ *
+ * Copyright (C) 2001 Deep Blue Solutions Ltd.
+ * Copyright (C) 2012 ARM Ltd.
+ * Copyright (C) 2015 Huawei Futurewei Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/sysreg.h>
+#include <asm/virt.h>
+
+.text
+.pushsection    .idmap.text, "ax"
+
+/*
+ * __cpu_soft_restart(el2_switch, entry, arg0, arg1, arg2) - Helper for
+ * cpu_soft_restart.
+ *
+ * @el2_switch: Flag to indicate a swich to EL2 is needed.
+ * @entry: Location to jump to for soft reset.
+ * arg0: First argument passed to @entry.
+ * arg1: Second argument passed to @entry.
+ * arg2: Third argument passed to @entry.
+ *
+ * Put the CPU into the same state as it would be if it had been reset, and
+ * branch to what would be the reset vector. It must be executed with the
+ * flat identity mapping.
+ */
+ENTRY(__cpu_soft_restart)
+	/* Clear sctlr_el1 flags. */
+	mrs	x12, sctlr_el1
+	ldr	x13, =SCTLR_ELx_FLAGS
+	bic	x12, x12, x13
+	msr	sctlr_el1, x12
+	isb
+
+	cbz	x0, 1f				// el2_switch?
+	mov	x0, x1				// entry
+	mov	x1, x2				// arg0
+	mov	x2, x3				// arg1
+	mov	x3, x4				// arg2
+	hvc	#HVC_CALL_FUNC			// no return
+
+1:	mov	x18, x1				// entry
+	mov	x0, x2				// arg0
+	mov	x1, x3				// arg1
+	mov	x2, x4				// arg2
+	ret	x18
+ENDPROC(__cpu_soft_restart)
+
+.popsection
diff --git a/arch/arm64/kernel/cpu-reset.h b/arch/arm64/kernel/cpu-reset.h
new file mode 100644
index 0000000..5a5ea0a
--- /dev/null
+++ b/arch/arm64/kernel/cpu-reset.h
@@ -0,0 +1,29 @@
+/*
+ * CPU reset routines
+ *
+ * Copyright (C) 2015 Huawei Futurewei Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _ARM64_CPU_RESET_H
+#define _ARM64_CPU_RESET_H
+
+#include <asm/virt.h>
+
+void __cpu_soft_restart(unsigned long el2_switch, unsigned long entry,
+	unsigned long arg0, unsigned long arg1, unsigned long arg2);
+
+static inline void __noreturn cpu_soft_restart(unsigned long el2_switch,
+	unsigned long entry, unsigned long arg0, unsigned long arg1,
+	unsigned long arg2)
+{
+	typeof(__cpu_soft_restart) *restart;
+	restart = (void *)virt_to_phys(__cpu_soft_restart);
+	restart(el2_switch, entry, arg0, arg1, arg2);
+	unreachable();
+}
+
+#endif
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 174+ messages in thread

* [PATCH 09/19] Revert "arm64: remove dead code"
  2016-01-15 19:18 ` Geoff Levand
@ 2016-01-15 19:18   ` Geoff Levand
  -1 siblings, 0 replies; 174+ messages in thread
From: Geoff Levand @ 2016-01-15 19:18 UTC (permalink / raw)
  To: linux-arm-kernel

This reverts commit b08d4640a3dca68670fc5af2fe9205b395a02388.

Add back the setup_mm_for_reboot() needed for kexec.

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/include/asm/mmu.h |  1 +
 arch/arm64/mm/mmu.c          | 11 +++++++++++
 2 files changed, 12 insertions(+)

diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index 990124a..6326d11 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -29,6 +29,7 @@ typedef struct {
 #define ASID(mm)	((mm)->context.id.counter & 0xffff)
 
 extern void paging_init(void);
+extern void setup_mm_for_reboot(void);
 extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt);
 extern void init_mem_pgprot(void);
 extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 873e363..afcf1ee 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -466,6 +466,17 @@ void __init paging_init(void)
 }
 
 /*
+ * Enable the identity mapping to allow the MMU disabling.
+ */
+void setup_mm_for_reboot(void)
+{
+	cpu_set_reserved_ttbr0();
+	flush_tlb_all();
+	cpu_set_idmap_tcr_t0sz();
+	cpu_switch_mm(idmap_pg_dir, &init_mm);
+}
+
+/*
  * Check whether a kernel address is valid (derived from arch/x86/).
  */
 int kern_addr_valid(unsigned long addr)
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 174+ messages in thread

* [PATCH 08/19] Revert "arm64: mm: remove unused cpu_set_idmap_tcr_t0sz function"
  2016-01-15 19:18 ` Geoff Levand
@ 2016-01-15 19:18   ` Geoff Levand
  -1 siblings, 0 replies; 174+ messages in thread
From: Geoff Levand @ 2016-01-15 19:18 UTC (permalink / raw)
  To: linux-arm-kernel

This reverts commit c51e97d89e526368eb697f87cd4d391b9e19f369.

Add back the cpu_set_idmap_tcr_t0sz function needed by setup_mmu_for_reboot.
---
 arch/arm64/include/asm/mmu_context.h | 35 +++++++++++++++++++++++------------
 1 file changed, 23 insertions(+), 12 deletions(-)

diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 2416578..7567030 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -70,23 +70,34 @@ static inline bool __cpu_uses_extended_idmap(void)
 		unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS)));
 }
 
+static inline void __cpu_set_tcr_t0sz(u64 t0sz)
+{
+	unsigned long tcr;
+
+	if (__cpu_uses_extended_idmap())
+		asm volatile (
+		"	mrs	%0, tcr_el1	;"
+		"	bfi	%0, %1, %2, %3	;"
+		"	msr	tcr_el1, %0	;"
+		"	isb"
+		: "=&r" (tcr)
+		: "r"(t0sz), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH));
+}
+
+/*
+ * Set TCR.T0SZ to the value appropriate for activating the identity map.
+ */
+static inline void cpu_set_idmap_tcr_t0sz(void)
+{
+	__cpu_set_tcr_t0sz(idmap_t0sz);
+}
+
 /*
  * Set TCR.T0SZ to its default value (based on VA_BITS)
  */
 static inline void cpu_set_default_tcr_t0sz(void)
 {
-	unsigned long tcr;
-
-	if (!__cpu_uses_extended_idmap())
-		return;
-
-	asm volatile (
-	"	mrs	%0, tcr_el1	;"
-	"	bfi	%0, %1, %2, %3	;"
-	"	msr	tcr_el1, %0	;"
-	"	isb"
-	: "=&r" (tcr)
-	: "r"(TCR_T0SZ(VA_BITS)), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH));
+	__cpu_set_tcr_t0sz(TCR_T0SZ(VA_BITS));
 }
 
 /*
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 174+ messages in thread

* [PATCH 00/19] arm64 kexec kernel patches v13
@ 2016-01-15 19:18 ` Geoff Levand
  0 siblings, 0 replies; 174+ messages in thread
From: Geoff Levand @ 2016-01-15 19:18 UTC (permalink / raw)
  To: linux-arm-kernel

This series adds the core support for kexec re-boot and kdump on ARM64.  This
version of the series combines Takahiro's kdump patches with my kexec patches.
Please consider all patches for inclusion.

I just tested all the endian combinations of kexec LE->LE, LE->BE, BE->BE,
BE->LE, and both LE an BE kdump, and all work as expected.

To load a second stage kernel and execute a kexec re-boot or to work with kdump
on ARM64 systems a series of patches to kexec-tools [2], which have not yet been
merged upstream, are needed.

To examine vmcore (/proc/vmcore), you should use
  - gdb v7.7 or later
  - crash v7.1.1 or later

[1]  https://git.kernel.org/cgit/linux/kernel/git/geoff/linux-kexec.git
[2]  https://git.kernel.org/cgit/linux/kernel/git/geoff/kexec-tools.git

Changes for v13 (Jan 15, 2016, 20m):

  o Rebase to Linux-4.4.
  o Remove align directive from cpu_reset.c.
  o Use inline C wrapper for cpu_soft_restart.
  o Revert the new image d-cache flush changes of v10.
  o Add SCTLR cleanup patch.
  o Change pr_devel to pr_debug.
  o Call flush_icache_range() for reboot_code_buffer.
  o Add .ltorg directive to arm64_relocate_new_kernel.
  o Make new asm macro copy_page.
  o Change cache maintenence from inner-shareable to non-shareable.
  o Rename KEXEC_ARCH_ARM64 to KEXEC_ARCH_AARCH64.

  o arm64: kvm: allows kvm cpu hotplug
    - remove some garbage code from kvm_host.h
  o arm64: kdump: reserve memory for crash dump kernel
    - change CONFIG_KEXEC to CONFIG_KEXEC_CORE
    - don't panic on crash kernel alloc failure
      (thanks to Mark Salter, RH)
  o arm64: kdump: implement machine_crash_shutdown()
    - change "boot/non-boot cpu" to "crashing/non-crashing cpu"
    - introduce is_in_crash_kexec() for readability
    - re-introduce machine_kexec_mask_interrupts(), as arch/arm has,
      to discard unexpected interrupts
    - call crash_save_cpu() before making cpus offline to avoid a possible race
      (thanks to Pratyush Anand/Mark Salter, RH)
  o arm64: kdump: update a kernel doc
    - clarify that we support "Image" format as well as vmlinux in kdump.txt
  o arm64: kdump: relax BUG_ON() if more than one cpus are still active
    - change a warning message at the failure of shooting down non-crahsing cpus

Changes for v12 (Nov 24, 2015, 18m):

  o No changes, rebase to Linux-4.4-rc2.

Changes for v11 (Nov 6, 2015, 18m):

  o Rebase to Linux-4.3.
  o Move the new image d-cache flush from arm64_relocate_new_kernel to machine_kexec.
  o Pass values to arm64_relocate_new_kernel in registers, not in global variables.
  o Fixups to setting the sctlr_el1 and sctlr_el2 flags.

Changes for v10 (Oct 18, 2015, 17m):

  o Rebase to Linux-4.3-rc6.
  o Move tcr_set_idmap_t0sz to assembler.h.
  o Add back simplified cpu_reset routines.
  o Combine kexec + kdump patches.

Changes for v9 (Apr 7, 2015, 11m):

  o Use new upstream flag IND_FLAGS.

Changes for v8 (Mar 19, 2015, 10m):

  o Rebase to Linux-4.0-rc4.
  o Re-boot using purgatory only.

Changes for v7 (Jan 16, 2015, 8m):

  o Rebase to Linux-3.19-rc4.
  o Change from ESR_EL2_ to ESR_ELx_.
  o Remove work-arounds for EFI systems.
  
Changes for v6 (Dec 2, 2014, 7m):

  o Rebase to Linux-3.18-rc2

Changes for v5 (Nov 16, 2014, 6m):

Changes for v4 (Oct 3, 2014, 5m):

Changes for v3 (Sept 23, 2014, 4m):

Changes for v2 (Sep 9, 2014, 4m):

  o Rebase to Linux-3.17-rc4.
  o Move macros from proc-macros.S to assembler.h.
  o Convert hcalls to use ISS field.
  o Add new hcall HVC_CALL_FUNC.
  o Add EL2 switch to soft_restart.

First submission v1 (May 13, 2014):

  o Based on Linux-3.15-rc4.

-Geoff

The following changes since commit afd2ff9b7e1b367172f18ba7f693dfb62bdcb2dc:

  Linux 4.4 (2016-01-10 15:01:32 -0800)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/geoff/linux-kexec.git kexec-v13

for you to fetch changes up to e52b39c76d2a967bdecc376daee07d6edd47b01a:

  arm64: kdump: relax BUG_ON() if more than one cpus are still active (2016-01-15 10:22:45 -0800)

----------------------------------------------------------------
AKASHI Takahiro (7):
      arm64: kvm: allows kvm cpu hotplug
      arm64: kdump: reserve memory for crash dump kernel
      arm64: kdump: implement machine_crash_shutdown()
      arm64: kdump: add kdump support
      arm64: kdump: enable kdump in the arm64 defconfig
      arm64: kdump: update a kernel doc
      arm64: kdump: relax BUG_ON() if more than one cpus are still active

Geoff Levand (11):
      arm64: Fold proc-macros.S into assembler.h
      arm64: Add new asm macro copy_page
      arm64: Cleanup SCTLR flags
      arm64: Convert hcalls to use HVC immediate value
      arm64: Add new hcall HVC_CALL_FUNC
      arm64: Add back cpu_reset routines
      Revert "arm64: mm: remove unused cpu_set_idmap_tcr_t0sz function"
      Revert "arm64: remove dead code"
      arm64/kexec: Add core kexec support
      arm64/kexec: Enable kexec in the arm64 defconfig
      arm64/kexec: Add pr_debug output

James Morse (1):
      arm64: kernel: Include _AC definition in page.h

 Documentation/kdump/kdump.txt        |  23 ++-
 arch/arm/include/asm/kvm_host.h      |  10 +-
 arch/arm/include/asm/kvm_mmu.h       |   1 +
 arch/arm/kvm/arm.c                   |  93 +++++++-----
 arch/arm/kvm/mmu.c                   |   5 +
 arch/arm64/Kconfig                   |  21 +++
 arch/arm64/configs/defconfig         |   2 +
 arch/arm64/include/asm/assembler.h   |  67 +++++++-
 arch/arm64/include/asm/kexec.h       |  93 ++++++++++++
 arch/arm64/include/asm/kvm_arm.h     |  11 --
 arch/arm64/include/asm/kvm_host.h    |  11 +-
 arch/arm64/include/asm/kvm_mmu.h     |   1 +
 arch/arm64/include/asm/mmu.h         |   1 +
 arch/arm64/include/asm/mmu_context.h |  35 +++--
 arch/arm64/include/asm/page.h        |   2 +
 arch/arm64/include/asm/sysreg.h      |  19 ++-
 arch/arm64/include/asm/virt.h        |  49 ++++++
 arch/arm64/kernel/Makefile           |   3 +
 arch/arm64/kernel/cpu-reset.S        |  57 +++++++
 arch/arm64/kernel/cpu-reset.h        |  29 ++++
 arch/arm64/kernel/crash_dump.c       |  71 +++++++++
 arch/arm64/kernel/hyp-stub.S         |  43 ++++--
 arch/arm64/kernel/machine_kexec.c    | 285 +++++++++++++++++++++++++++++++++++
 arch/arm64/kernel/relocate_kernel.S  | 131 ++++++++++++++++
 arch/arm64/kernel/setup.c            |   7 +-
 arch/arm64/kernel/smp.c              |  18 ++-
 arch/arm64/kvm/hyp-init.S            |  39 ++++-
 arch/arm64/kvm/hyp.S                 |  44 ++++--
 arch/arm64/mm/cache.S                |   2 -
 arch/arm64/mm/init.c                 |  89 +++++++++++
 arch/arm64/mm/mmu.c                  |  11 ++
 arch/arm64/mm/proc-macros.S          |  64 --------
 arch/arm64/mm/proc.S                 |   3 -
 include/uapi/linux/kexec.h           |   1 +
 34 files changed, 1175 insertions(+), 166 deletions(-)
 create mode 100644 arch/arm64/include/asm/kexec.h
 create mode 100644 arch/arm64/kernel/cpu-reset.S
 create mode 100644 arch/arm64/kernel/cpu-reset.h
 create mode 100644 arch/arm64/kernel/crash_dump.c
 create mode 100644 arch/arm64/kernel/machine_kexec.c
 create mode 100644 arch/arm64/kernel/relocate_kernel.S
 delete mode 100644 arch/arm64/mm/proc-macros.S

-- 
2.5.0

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 03/19] arm64: Add new asm macro copy_page
  2016-01-15 19:18 ` Geoff Levand
@ 2016-01-15 19:18   ` Geoff Levand
  -1 siblings, 0 replies; 174+ messages in thread
From: Geoff Levand @ 2016-01-15 19:18 UTC (permalink / raw)
  To: linux-arm-kernel

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/include/asm/assembler.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 21979a4..c47a623 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -24,6 +24,7 @@
 #define __ASM_ASSEMBLER_H
 
 #include <asm/asm-offsets.h>
+#include <asm/page.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/ptrace.h>
 #include <asm/thread_info.h>
@@ -240,6 +241,24 @@ lr	.req	x30		// link register
 	.endm
 
 /*
+ * copy_page - copy src to dest using temp registers t1-t8
+ */
+	.macro copy_page dest:req src:req t1:req t2:req t3:req t4:req t5:req t6:req t7:req t8:req
+1:	ldp	\t1, \t2, [\src]
+	ldp	\t3, \t4, [\src, #16]
+	ldp	\t5, \t6, [\src, #32]
+	ldp	\t7, \t8, [\src, #48]
+	add	\src, \src, #64
+	stnp	\t1, \t2, [\dest]
+	stnp	\t3, \t4, [\dest, #16]
+	stnp	\t5, \t6, [\dest, #32]
+	stnp	\t7, \t8, [\dest, #48]
+	add	\dest, \dest, #64
+	tst	\src, #(PAGE_SIZE - 1)
+	b.ne	1b
+	.endm
+
+/*
  * Annotate a function as position independent, i.e., safe to be called before
  * the kernel virtual mapping is activated.
  */
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 174+ messages in thread

* [PATCH 06/19] arm64: Add new hcall HVC_CALL_FUNC
  2016-01-15 19:18 ` Geoff Levand
@ 2016-01-15 19:18   ` Geoff Levand
  -1 siblings, 0 replies; 174+ messages in thread
From: Geoff Levand @ 2016-01-15 19:18 UTC (permalink / raw)
  To: linux-arm-kernel

Add the new hcall HVC_CALL_FUNC that allows execution of a function at EL2.
During CPU reset the CPU must be brought to the exception level it had on
entry to the kernel.  The HVC_CALL_FUNC hcall will provide the mechanism
needed for this exception level switch.

To allow the HVC_CALL_FUNC exception vector to work without a stack, which is
needed to support an hcall at CPU reset, this implementation uses register x18
to store the link register across the caller provided function.  This dictates
that the caller provided function must preserve the contents of register x18.

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/include/asm/virt.h | 13 +++++++++++++
 arch/arm64/kernel/hyp-stub.S  | 13 ++++++++++++-
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index eb10368..3070096 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -45,6 +45,19 @@
 
 #define HVC_SET_VECTORS 2
 
+/*
+ * HVC_CALL_FUNC - Execute a function at EL2.
+ *
+ * @x0: Physical address of the function to be executed.
+ * @x1: Passed as the first argument to the function.
+ * @x2: Passed as the second argument to the function.
+ * @x3: Passed as the third argument to the function.
+ *
+ * The called function must preserve the contents of register x18.
+ */
+
+#define HVC_CALL_FUNC 3
+
 #define BOOT_CPU_MODE_EL1	(0xe11)
 #define BOOT_CPU_MODE_EL2	(0xe12)
 
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index 017ab519..e8febe9 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -67,8 +67,19 @@ el1_sync:
 	b	2f
 
 1:	cmp	x18, #HVC_SET_VECTORS
-	b.ne	2f
+	b.ne	1f
 	msr	vbar_el2, x0
+	b	2f
+
+1:	cmp	x18, #HVC_CALL_FUNC
+	b.ne	2f
+	mov	x18, lr
+	mov	lr, x0
+	mov	x0, x1
+	mov	x1, x2
+	mov	x2, x3
+	blr	lr
+	mov	lr, x18
 
 2:	eret
 ENDPROC(el1_sync)
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 174+ messages in thread

* [PATCH 04/19] arm64: Cleanup SCTLR flags
  2016-01-15 19:18 ` Geoff Levand
@ 2016-01-15 19:18   ` Geoff Levand
  -1 siblings, 0 replies; 174+ messages in thread
From: Geoff Levand @ 2016-01-15 19:18 UTC (permalink / raw)
  To: linux-arm-kernel

We currently have macros defining flags for the arm64 sctlr registers in both
kvm_arm.h and sysreg.h.  To clean things up and simplify move the definitions
of the SCTLR_EL2 flags from kvm_arm.h to sysreg.h, rename any SCTLR_EL1 or
SCTLR_EL2 flags that are common to both registers to be SCTLR_ELx, with 'x'
indicating a common flag, and fixup all files to include the proper header or
to use the new macro names.

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/include/asm/kvm_arm.h | 11 -----------
 arch/arm64/include/asm/sysreg.h  | 19 +++++++++++++++----
 arch/arm64/kvm/hyp-init.S        |  6 +++---
 3 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 5e6857b..92ef6f6 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -83,17 +83,6 @@
 #define HCR_INT_OVERRIDE   (HCR_FMO | HCR_IMO)
 
 
-/* Hyp System Control Register (SCTLR_EL2) bits */
-#define SCTLR_EL2_EE	(1 << 25)
-#define SCTLR_EL2_WXN	(1 << 19)
-#define SCTLR_EL2_I	(1 << 12)
-#define SCTLR_EL2_SA	(1 << 3)
-#define SCTLR_EL2_C	(1 << 2)
-#define SCTLR_EL2_A	(1 << 1)
-#define SCTLR_EL2_M	1
-#define SCTLR_EL2_FLAGS	(SCTLR_EL2_M | SCTLR_EL2_A | SCTLR_EL2_C |	\
-			 SCTLR_EL2_SA | SCTLR_EL2_I)
-
 /* TCR_EL2 Registers bits */
 #define TCR_EL2_RES1	((1 << 31) | (1 << 23))
 #define TCR_EL2_TBI	(1 << 20)
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index d48ab5b..109d46e 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -80,10 +80,21 @@
 #define SET_PSTATE_PAN(x) __inst_arm(0xd5000000 | REG_PSTATE_PAN_IMM |\
 				     (!!x)<<8 | 0x1f)
 
-/* SCTLR_EL1 */
-#define SCTLR_EL1_CP15BEN	(0x1 << 5)
-#define SCTLR_EL1_SED		(0x1 << 8)
-#define SCTLR_EL1_SPAN		(0x1 << 23)
+/* Common SCTLR_ELx flags. */
+#define SCTLR_ELx_EE    (1 << 25)
+#define SCTLR_ELx_I	(1 << 12)
+#define SCTLR_ELx_SA	(1 << 3)
+#define SCTLR_ELx_C	(1 << 2)
+#define SCTLR_ELx_A	(1 << 1)
+#define SCTLR_ELx_M	1
+
+#define SCTLR_ELx_FLAGS	(SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \
+			 SCTLR_ELx_SA | SCTLR_ELx_I)
+
+/* SCTLR_EL1 specific flags. */
+#define SCTLR_EL1_SPAN		(1 << 23)
+#define SCTLR_EL1_SED		(1 << 8)
+#define SCTLR_EL1_CP15BEN	(1 << 5)
 
 
 /* id_aa64isar0 */
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
index 178ba22..1d7e502 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -20,7 +20,7 @@
 #include <asm/assembler.h>
 #include <asm/kvm_arm.h>
 #include <asm/kvm_mmu.h>
-#include <asm/pgtable-hwdef.h>
+#include <asm/sysreg.h>
 
 	.text
 	.pushsection	.hyp.idmap.text, "ax"
@@ -105,8 +105,8 @@ __do_hyp_init:
 	dsb	sy
 
 	mrs	x4, sctlr_el2
-	and	x4, x4, #SCTLR_EL2_EE	// preserve endianness of EL2
-	ldr	x5, =SCTLR_EL2_FLAGS
+	and	x4, x4, #SCTLR_ELx_EE	// preserve endianness of EL2
+	ldr	x5, =SCTLR_ELx_FLAGS
 	orr	x4, x4, x5
 	msr	sctlr_el2, x4
 	isb
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 174+ messages in thread

* [PATCH 02/19] arm64: kernel: Include _AC definition in page.h
  2016-01-15 19:18 ` Geoff Levand
@ 2016-01-15 19:18   ` Geoff Levand
  -1 siblings, 0 replies; 174+ messages in thread
From: Geoff Levand @ 2016-01-15 19:18 UTC (permalink / raw)
  To: linux-arm-kernel

From: James Morse <james.morse@arm.com>

From: James Morse <james.morse@arm.com>

page.h uses '_AC' in the definition of PAGE_SIZE, but doesn't include
linux/const.h where this is defined. This produces build warnings when only
asm/page.h is included by asm code.

Signed-off-by: James Morse <james.morse@arm.com>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/include/asm/page.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index 9b2f5a9..fbafd0a 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -19,6 +19,8 @@
 #ifndef __ASM_PAGE_H
 #define __ASM_PAGE_H
 
+#include <linux/const.h>
+
 /* PAGE_SHIFT determines the page size */
 /* CONT_SHIFT determines the number of pages which can be tracked together  */
 #ifdef CONFIG_ARM64_64K_PAGES
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 174+ messages in thread

* [PATCH 05/19] arm64: Convert hcalls to use HVC immediate value
  2016-01-15 19:18 ` Geoff Levand
@ 2016-01-15 19:18   ` Geoff Levand
  -1 siblings, 0 replies; 174+ messages in thread
From: Geoff Levand @ 2016-01-15 19:18 UTC (permalink / raw)
  To: linux-arm-kernel

The existing arm64 hcall implementations are limited in that they only allow
for two distinct hcalls; with the x0 register either zero or not zero.  Also,
the API of the hyp-stub exception vector routines and the KVM exception vector
routines differ; hyp-stub uses a non-zero value in x0 to implement
__hyp_set_vectors, whereas KVM uses it to implement kvm_call_hyp.

To allow for additional hcalls to be defined and to make the arm64 hcall API
more consistent across exception vector routines, change the hcall
implementations to use the 16 bit immediate value of the HVC instruction to
specify the hcall type.

Define three new preprocessor macros HVC_CALL_HYP, HVC_GET_VECTORS, and
HVC_SET_VECTORS to be used as hcall type specifiers and convert the
existing __hyp_get_vectors(), __hyp_set_vectors() and kvm_call_hyp() routines
to use these new macros when executing an HVC call.  Also, change the
corresponding hyp-stub and KVM el1_sync exception vector routines to use these
new macros.

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/include/asm/virt.h | 27 +++++++++++++++++++++++++++
 arch/arm64/kernel/hyp-stub.S  | 32 +++++++++++++++++++++-----------
 arch/arm64/kvm/hyp.S          | 16 +++++++++-------
 3 files changed, 57 insertions(+), 18 deletions(-)

diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 7a5df52..eb10368 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -18,6 +18,33 @@
 #ifndef __ASM__VIRT_H
 #define __ASM__VIRT_H
 
+/*
+ * The arm64 hcall implementation uses the ISS field of the ESR_EL2 register to
+ * specify the hcall type.  The exception handlers are allowed to use registers
+ * x17 and x18 in their implementation.  Any routine issuing an hcall must not
+ * expect these registers to be preserved.
+ */
+
+/*
+ * HVC_CALL_HYP - Execute a hyp routine.
+ */
+
+#define HVC_CALL_HYP 0
+
+/*
+ * HVC_GET_VECTORS - Return the value of the vbar_el2 register.
+ */
+
+#define HVC_GET_VECTORS 1
+
+/*
+ * HVC_SET_VECTORS - Set the value of the vbar_el2 register.
+ *
+ * @x0: Physical address of the new vector table.
+ */
+
+#define HVC_SET_VECTORS 2
+
 #define BOOT_CPU_MODE_EL1	(0xe11)
 #define BOOT_CPU_MODE_EL2	(0xe12)
 
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index a272f33..017ab519 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -22,6 +22,7 @@
 #include <linux/irqchip/arm-gic-v3.h>
 
 #include <asm/assembler.h>
+#include <asm/kvm_arm.h>
 #include <asm/ptrace.h>
 #include <asm/virt.h>
 
@@ -53,14 +54,22 @@ ENDPROC(__hyp_stub_vectors)
 	.align 11
 
 el1_sync:
-	mrs	x1, esr_el2
-	lsr	x1, x1, #26
-	cmp	x1, #0x16
+	mrs	x18, esr_el2
+	lsr	x17, x18, #ESR_ELx_EC_SHIFT
+	and	x18, x18, #ESR_ELx_ISS_MASK
+
+	cmp	x17, #ESR_ELx_EC_HVC64
 	b.ne	2f				// Not an HVC trap
-	cbz	x0, 1f
-	msr	vbar_el2, x0			// Set vbar_el2
+
+	cmp	x18, #HVC_GET_VECTORS
+	b.ne	1f
+	mrs	x0, vbar_el2
 	b	2f
-1:	mrs	x0, vbar_el2			// Return vbar_el2
+
+1:	cmp	x18, #HVC_SET_VECTORS
+	b.ne	2f
+	msr	vbar_el2, x0
+
 2:	eret
 ENDPROC(el1_sync)
 
@@ -100,11 +109,12 @@ ENDPROC(\label)
  * initialisation entry point.
  */
 
-ENTRY(__hyp_get_vectors)
-	mov	x0, xzr
-	// fall through
 ENTRY(__hyp_set_vectors)
-	hvc	#0
+	hvc	#HVC_SET_VECTORS
 	ret
-ENDPROC(__hyp_get_vectors)
 ENDPROC(__hyp_set_vectors)
+
+ENTRY(__hyp_get_vectors)
+	hvc	#HVC_GET_VECTORS
+	ret
+ENDPROC(__hyp_get_vectors)
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 86c2898..15b1ef9 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -29,6 +29,7 @@
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmu.h>
 #include <asm/memory.h>
+#include <asm/virt.h>
 
 #define CPU_GP_REG_OFFSET(x)	(CPU_GP_REGS + x)
 #define CPU_XREG_OFFSET(x)	CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x)
@@ -936,12 +937,9 @@ __hyp_panic_str:
  * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c).  Return values are
  * passed in r0 and r1.
  *
- * A function pointer with a value of 0 has a special meaning, and is
- * used to implement __hyp_get_vectors in the same way as in
- * arch/arm64/kernel/hyp_stub.S.
  */
 ENTRY(kvm_call_hyp)
-	hvc	#0
+	hvc	#HVC_CALL_HYP
 	ret
 ENDPROC(kvm_call_hyp)
 
@@ -972,6 +970,7 @@ el1_sync:					// Guest trapped into EL2
 
 	mrs	x1, esr_el2
 	lsr	x2, x1, #ESR_ELx_EC_SHIFT
+	and	x0, x1, #ESR_ELx_ISS_MASK
 
 	cmp	x2, #ESR_ELx_EC_HVC64
 	b.ne	el1_trap
@@ -980,15 +979,18 @@ el1_sync:					// Guest trapped into EL2
 	cbnz	x3, el1_trap			// called HVC
 
 	/* Here, we're pretty sure the host called HVC. */
+	mov	x18, x0
 	pop	x2, x3
 	pop	x0, x1
 
-	/* Check for __hyp_get_vectors */
-	cbnz	x0, 1f
+	cmp	x18, #HVC_GET_VECTORS
+	b.ne	1f
 	mrs	x0, vbar_el2
 	b	2f
 
-1:	push	lr, xzr
+1:	/* Default to HVC_CALL_HYP. */
+
+	push	lr, xzr
 
 	/*
 	 * Compute the function address in EL2, and shuffle the parameters.
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 174+ messages in thread

* [PATCH 01/19] arm64: Fold proc-macros.S into assembler.h
  2016-01-15 19:18 ` Geoff Levand
@ 2016-01-15 19:18   ` Geoff Levand
  -1 siblings, 0 replies; 174+ messages in thread
From: Geoff Levand @ 2016-01-15 19:18 UTC (permalink / raw)
  To: linux-arm-kernel

To allow the assembler macros defined in arch/arm64/mm/proc-macros.S to be used
outside the mm code move the contents of proc-macros.S to asm/assembler.h.  Also,
delete proc-macros.S, and fix up all references to proc-macros.S.

Signed-off-by: Geoff Levand <geoff@infradead.org>
Acked-by: Pavel Machek <pavel@ucw.cz>
---
 arch/arm64/include/asm/assembler.h | 48 +++++++++++++++++++++++++++-
 arch/arm64/mm/cache.S              |  2 --
 arch/arm64/mm/proc-macros.S        | 64 --------------------------------------
 arch/arm64/mm/proc.S               |  3 --
 4 files changed, 47 insertions(+), 70 deletions(-)
 delete mode 100644 arch/arm64/mm/proc-macros.S

diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 12eff92..21979a4 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -1,5 +1,5 @@
 /*
- * Based on arch/arm/include/asm/assembler.h
+ * Based on arch/arm/include/asm/assembler.h, arch/arm/mm/proc-macros.S
  *
  * Copyright (C) 1996-2000 Russell King
  * Copyright (C) 2012 ARM Ltd.
@@ -23,6 +23,8 @@
 #ifndef __ASM_ASSEMBLER_H
 #define __ASM_ASSEMBLER_H
 
+#include <asm/asm-offsets.h>
+#include <asm/pgtable-hwdef.h>
 #include <asm/ptrace.h>
 #include <asm/thread_info.h>
 
@@ -194,6 +196,50 @@ lr	.req	x30		// link register
 	.endm
 
 /*
+ * vma_vm_mm - get mm pointer from vma pointer (vma->vm_mm)
+ */
+	.macro	vma_vm_mm, rd, rn
+	ldr	\rd, [\rn, #VMA_VM_MM]
+	.endm
+
+/*
+ * mmid - get context id from mm pointer (mm->context.id)
+ */
+	.macro	mmid, rd, rn
+	ldr	\rd, [\rn, #MM_CONTEXT_ID]
+	.endm
+
+/*
+ * dcache_line_size - get the minimum D-cache line size from the CTR register.
+ */
+	.macro	dcache_line_size, reg, tmp
+	mrs	\tmp, ctr_el0			// read CTR
+	ubfm	\tmp, \tmp, #16, #19		// cache line size encoding
+	mov	\reg, #4			// bytes per word
+	lsl	\reg, \reg, \tmp		// actual cache line size
+	.endm
+
+/*
+ * icache_line_size - get the minimum I-cache line size from the CTR register.
+ */
+	.macro	icache_line_size, reg, tmp
+	mrs	\tmp, ctr_el0			// read CTR
+	and	\tmp, \tmp, #0xf		// cache line size encoding
+	mov	\reg, #4			// bytes per word
+	lsl	\reg, \reg, \tmp		// actual cache line size
+	.endm
+
+/*
+ * tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map
+ */
+	.macro	tcr_set_idmap_t0sz, valreg, tmpreg
+#ifndef CONFIG_ARM64_VA_BITS_48
+	ldr_l	\tmpreg, idmap_t0sz
+	bfi	\valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH
+#endif
+	.endm
+
+/*
  * Annotate a function as position independent, i.e., safe to be called before
  * the kernel virtual mapping is activated.
  */
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index cfa44a6..f49041d 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -24,8 +24,6 @@
 #include <asm/cpufeature.h>
 #include <asm/alternative.h>
 
-#include "proc-macros.S"
-
 /*
  *	flush_icache_range(start,end)
  *
diff --git a/arch/arm64/mm/proc-macros.S b/arch/arm64/mm/proc-macros.S
deleted file mode 100644
index 4c4d93c..0000000
--- a/arch/arm64/mm/proc-macros.S
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Based on arch/arm/mm/proc-macros.S
- *
- * Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
-
-/*
- * vma_vm_mm - get mm pointer from vma pointer (vma->vm_mm)
- */
-	.macro	vma_vm_mm, rd, rn
-	ldr	\rd, [\rn, #VMA_VM_MM]
-	.endm
-
-/*
- * mmid - get context id from mm pointer (mm->context.id)
- */
-	.macro	mmid, rd, rn
-	ldr	\rd, [\rn, #MM_CONTEXT_ID]
-	.endm
-
-/*
- * dcache_line_size - get the minimum D-cache line size from the CTR register.
- */
-	.macro	dcache_line_size, reg, tmp
-	mrs	\tmp, ctr_el0			// read CTR
-	ubfm	\tmp, \tmp, #16, #19		// cache line size encoding
-	mov	\reg, #4			// bytes per word
-	lsl	\reg, \reg, \tmp		// actual cache line size
-	.endm
-
-/*
- * icache_line_size - get the minimum I-cache line size from the CTR register.
- */
-	.macro	icache_line_size, reg, tmp
-	mrs	\tmp, ctr_el0			// read CTR
-	and	\tmp, \tmp, #0xf		// cache line size encoding
-	mov	\reg, #4			// bytes per word
-	lsl	\reg, \reg, \tmp		// actual cache line size
-	.endm
-
-/*
- * tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map
- */
-	.macro	tcr_set_idmap_t0sz, valreg, tmpreg
-#ifndef CONFIG_ARM64_VA_BITS_48
-	ldr_l	\tmpreg, idmap_t0sz
-	bfi	\valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH
-#endif
-	.endm
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index cacecc4..7ab3a90 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -23,11 +23,8 @@
 #include <asm/assembler.h>
 #include <asm/asm-offsets.h>
 #include <asm/hwcap.h>
-#include <asm/pgtable-hwdef.h>
 #include <asm/pgtable.h>
 
-#include "proc-macros.S"
-
 #ifdef CONFIG_ARM64_64K_PAGES
 #define TCR_TG_FLAGS	TCR_TG0_64K | TCR_TG1_64K
 #elif defined(CONFIG_ARM64_16K_PAGES)
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 174+ messages in thread

* [PATCH 06/19] arm64: Add new hcall HVC_CALL_FUNC
@ 2016-01-15 19:18   ` Geoff Levand
  0 siblings, 0 replies; 174+ messages in thread
From: Geoff Levand @ 2016-01-15 19:18 UTC (permalink / raw)
  To: Catalin Marinas, Will Deacon
  Cc: Mark Rutland, marc.zyngier, kexec, AKASHI Takahiro, James Morse,
	linux-arm-kernel, christoffer.dall

Add the new hcall HVC_CALL_FUNC that allows execution of a function at EL2.
During CPU reset the CPU must be brought to the exception level it had on
entry to the kernel.  The HVC_CALL_FUNC hcall will provide the mechanism
needed for this exception level switch.

To allow the HVC_CALL_FUNC exception vector to work without a stack, which is
needed to support an hcall at CPU reset, this implementation uses register x18
to store the link register across the caller provided function.  This dictates
that the caller provided function must preserve the contents of register x18.

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/include/asm/virt.h | 13 +++++++++++++
 arch/arm64/kernel/hyp-stub.S  | 13 ++++++++++++-
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index eb10368..3070096 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -45,6 +45,19 @@
 
 #define HVC_SET_VECTORS 2
 
+/*
+ * HVC_CALL_FUNC - Execute a function at EL2.
+ *
+ * @x0: Physical address of the function to be executed.
+ * @x1: Passed as the first argument to the function.
+ * @x2: Passed as the second argument to the function.
+ * @x3: Passed as the third argument to the function.
+ *
+ * The called function must preserve the contents of register x18.
+ */
+
+#define HVC_CALL_FUNC 3
+
 #define BOOT_CPU_MODE_EL1	(0xe11)
 #define BOOT_CPU_MODE_EL2	(0xe12)
 
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index 017ab519..e8febe9 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -67,8 +67,19 @@ el1_sync:
 	b	2f
 
 1:	cmp	x18, #HVC_SET_VECTORS
-	b.ne	2f
+	b.ne	1f
 	msr	vbar_el2, x0
+	b	2f
+
+1:	cmp	x18, #HVC_CALL_FUNC
+	b.ne	2f
+	mov	x18, lr
+	mov	lr, x0
+	mov	x0, x1
+	mov	x1, x2
+	mov	x2, x3
+	blr	lr
+	mov	lr, x18
 
 2:	eret
 ENDPROC(el1_sync)
-- 
2.5.0



_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 174+ messages in thread

* [PATCH 08/19] Revert "arm64: mm: remove unused cpu_set_idmap_tcr_t0sz function"
@ 2016-01-15 19:18   ` Geoff Levand
  0 siblings, 0 replies; 174+ messages in thread
From: Geoff Levand @ 2016-01-15 19:18 UTC (permalink / raw)
  To: Catalin Marinas, Will Deacon
  Cc: Mark Rutland, marc.zyngier, kexec, AKASHI Takahiro, James Morse,
	linux-arm-kernel, christoffer.dall

This reverts commit c51e97d89e526368eb697f87cd4d391b9e19f369.

Add back the cpu_set_idmap_tcr_t0sz function needed by setup_mmu_for_reboot.
---
 arch/arm64/include/asm/mmu_context.h | 35 +++++++++++++++++++++++------------
 1 file changed, 23 insertions(+), 12 deletions(-)

diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 2416578..7567030 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -70,23 +70,34 @@ static inline bool __cpu_uses_extended_idmap(void)
 		unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS)));
 }
 
+static inline void __cpu_set_tcr_t0sz(u64 t0sz)
+{
+	unsigned long tcr;
+
+	if (__cpu_uses_extended_idmap())
+		asm volatile (
+		"	mrs	%0, tcr_el1	;"
+		"	bfi	%0, %1, %2, %3	;"
+		"	msr	tcr_el1, %0	;"
+		"	isb"
+		: "=&r" (tcr)
+		: "r"(t0sz), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH));
+}
+
+/*
+ * Set TCR.T0SZ to the value appropriate for activating the identity map.
+ */
+static inline void cpu_set_idmap_tcr_t0sz(void)
+{
+	__cpu_set_tcr_t0sz(idmap_t0sz);
+}
+
 /*
  * Set TCR.T0SZ to its default value (based on VA_BITS)
  */
 static inline void cpu_set_default_tcr_t0sz(void)
 {
-	unsigned long tcr;
-
-	if (!__cpu_uses_extended_idmap())
-		return;
-
-	asm volatile (
-	"	mrs	%0, tcr_el1	;"
-	"	bfi	%0, %1, %2, %3	;"
-	"	msr	tcr_el1, %0	;"
-	"	isb"
-	: "=&r" (tcr)
-	: "r"(TCR_T0SZ(VA_BITS)), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH));
+	__cpu_set_tcr_t0sz(TCR_T0SZ(VA_BITS));
 }
 
 /*
-- 
2.5.0



_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 174+ messages in thread

* [PATCH 09/19] Revert "arm64: remove dead code"
@ 2016-01-15 19:18   ` Geoff Levand
  0 siblings, 0 replies; 174+ messages in thread
From: Geoff Levand @ 2016-01-15 19:18 UTC (permalink / raw)
  To: Catalin Marinas, Will Deacon
  Cc: Mark Rutland, marc.zyngier, kexec, AKASHI Takahiro, James Morse,
	linux-arm-kernel, christoffer.dall

This reverts commit b08d4640a3dca68670fc5af2fe9205b395a02388.

Add back the setup_mm_for_reboot() needed for kexec.

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/include/asm/mmu.h |  1 +
 arch/arm64/mm/mmu.c          | 11 +++++++++++
 2 files changed, 12 insertions(+)

diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index 990124a..6326d11 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -29,6 +29,7 @@ typedef struct {
 #define ASID(mm)	((mm)->context.id.counter & 0xffff)
 
 extern void paging_init(void);
+extern void setup_mm_for_reboot(void);
 extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt);
 extern void init_mem_pgprot(void);
 extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 873e363..afcf1ee 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -466,6 +466,17 @@ void __init paging_init(void)
 }
 
 /*
+ * Enable the identity mapping to allow the MMU disabling.
+ */
+void setup_mm_for_reboot(void)
+{
+	cpu_set_reserved_ttbr0();
+	flush_tlb_all();
+	cpu_set_idmap_tcr_t0sz();
+	cpu_switch_mm(idmap_pg_dir, &init_mm);
+}
+
+/*
  * Check whether a kernel address is valid (derived from arch/x86/).
  */
 int kern_addr_valid(unsigned long addr)
-- 
2.5.0



_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 174+ messages in thread

* [PATCH 07/19] arm64: Add back cpu_reset routines
@ 2016-01-15 19:18   ` Geoff Levand
  0 siblings, 0 replies; 174+ messages in thread
From: Geoff Levand @ 2016-01-15 19:18 UTC (permalink / raw)
  To: Catalin Marinas, Will Deacon
  Cc: Mark Rutland, marc.zyngier, kexec, AKASHI Takahiro, James Morse,
	linux-arm-kernel, christoffer.dall

Commit 68234df4ea7939f98431aa81113fbdce10c4a84b (arm64: kill flush_cache_all())
removed the global arm64 routines cpu_reset() and cpu_soft_restart() needed by
the arm64 kexec and kdump support.  Add simplified versions of those two
routines back with some changes needed for kexec in the new files cpu_reset.S,
and cpu_reset.h.

When a CPU is reset it needs to be put into the exception level it had
when it entered the kernel. Update cpu_reset() to accept an argument
which signals if the reset address needs to be entered at EL1 or EL2.

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/kernel/cpu-reset.S | 57 +++++++++++++++++++++++++++++++++++++++++++
 arch/arm64/kernel/cpu-reset.h | 29 ++++++++++++++++++++++
 2 files changed, 86 insertions(+)
 create mode 100644 arch/arm64/kernel/cpu-reset.S
 create mode 100644 arch/arm64/kernel/cpu-reset.h

diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S
new file mode 100644
index 0000000..f8d00d5
--- /dev/null
+++ b/arch/arm64/kernel/cpu-reset.S
@@ -0,0 +1,57 @@
+/*
+ * CPU reset routines
+ *
+ * Copyright (C) 2001 Deep Blue Solutions Ltd.
+ * Copyright (C) 2012 ARM Ltd.
+ * Copyright (C) 2015 Huawei Futurewei Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/sysreg.h>
+#include <asm/virt.h>
+
+.text
+.pushsection    .idmap.text, "ax"
+
+/*
+ * __cpu_soft_restart(el2_switch, entry, arg0, arg1, arg2) - Helper for
+ * cpu_soft_restart.
+ *
+ * @el2_switch: Flag to indicate a swich to EL2 is needed.
+ * @entry: Location to jump to for soft reset.
+ * arg0: First argument passed to @entry.
+ * arg1: Second argument passed to @entry.
+ * arg2: Third argument passed to @entry.
+ *
+ * Put the CPU into the same state as it would be if it had been reset, and
+ * branch to what would be the reset vector. It must be executed with the
+ * flat identity mapping.
+ */
+ENTRY(__cpu_soft_restart)
+	/* Clear sctlr_el1 flags. */
+	mrs	x12, sctlr_el1
+	ldr	x13, =SCTLR_ELx_FLAGS
+	bic	x12, x12, x13
+	msr	sctlr_el1, x12
+	isb
+
+	cbz	x0, 1f				// el2_switch?
+	mov	x0, x1				// entry
+	mov	x1, x2				// arg0
+	mov	x2, x3				// arg1
+	mov	x3, x4				// arg2
+	hvc	#HVC_CALL_FUNC			// no return
+
+1:	mov	x18, x1				// entry
+	mov	x0, x2				// arg0
+	mov	x1, x3				// arg1
+	mov	x2, x4				// arg2
+	ret	x18
+ENDPROC(__cpu_soft_restart)
+
+.popsection
diff --git a/arch/arm64/kernel/cpu-reset.h b/arch/arm64/kernel/cpu-reset.h
new file mode 100644
index 0000000..5a5ea0a
--- /dev/null
+++ b/arch/arm64/kernel/cpu-reset.h
@@ -0,0 +1,29 @@
+/*
+ * CPU reset routines
+ *
+ * Copyright (C) 2015 Huawei Futurewei Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _ARM64_CPU_RESET_H
+#define _ARM64_CPU_RESET_H
+
+#include <asm/virt.h>
+
+void __cpu_soft_restart(unsigned long el2_switch, unsigned long entry,
+	unsigned long arg0, unsigned long arg1, unsigned long arg2);
+
+static inline void __noreturn cpu_soft_restart(unsigned long el2_switch,
+	unsigned long entry, unsigned long arg0, unsigned long arg1,
+	unsigned long arg2)
+{
+	typeof(__cpu_soft_restart) *restart;
+	restart = (void *)virt_to_phys(__cpu_soft_restart);
+	restart(el2_switch, entry, arg0, arg1, arg2);
+	unreachable();
+}
+
+#endif
-- 
2.5.0



_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 174+ messages in thread

* [PATCH 00/19] arm64 kexec kernel patches v13
@ 2016-01-15 19:18 ` Geoff Levand
  0 siblings, 0 replies; 174+ messages in thread
From: Geoff Levand @ 2016-01-15 19:18 UTC (permalink / raw)
  To: Catalin Marinas, Will Deacon
  Cc: Mark Rutland, marc.zyngier, kexec, AKASHI Takahiro, James Morse,
	linux-arm-kernel, christoffer.dall

This series adds the core support for kexec re-boot and kdump on ARM64.  This
version of the series combines Takahiro's kdump patches with my kexec patches.
Please consider all patches for inclusion.

I just tested all the endian combinations of kexec LE->LE, LE->BE, BE->BE,
BE->LE, and both LE an BE kdump, and all work as expected.

To load a second stage kernel and execute a kexec re-boot or to work with kdump
on ARM64 systems a series of patches to kexec-tools [2], which have not yet been
merged upstream, are needed.

To examine vmcore (/proc/vmcore), you should use
  - gdb v7.7 or later
  - crash v7.1.1 or later

[1]  https://git.kernel.org/cgit/linux/kernel/git/geoff/linux-kexec.git
[2]  https://git.kernel.org/cgit/linux/kernel/git/geoff/kexec-tools.git

Changes for v13 (Jan 15, 2016, 20m):

  o Rebase to Linux-4.4.
  o Remove align directive from cpu_reset.c.
  o Use inline C wrapper for cpu_soft_restart.
  o Revert the new image d-cache flush changes of v10.
  o Add SCTLR cleanup patch.
  o Change pr_devel to pr_debug.
  o Call flush_icache_range() for reboot_code_buffer.
  o Add .ltorg directive to arm64_relocate_new_kernel.
  o Make new asm macro copy_page.
  o Change cache maintenence from inner-shareable to non-shareable.
  o Rename KEXEC_ARCH_ARM64 to KEXEC_ARCH_AARCH64.

  o arm64: kvm: allows kvm cpu hotplug
    - remove some garbage code from kvm_host.h
  o arm64: kdump: reserve memory for crash dump kernel
    - change CONFIG_KEXEC to CONFIG_KEXEC_CORE
    - don't panic on crash kernel alloc failure
      (thanks to Mark Salter, RH)
  o arm64: kdump: implement machine_crash_shutdown()
    - change "boot/non-boot cpu" to "crashing/non-crashing cpu"
    - introduce is_in_crash_kexec() for readability
    - re-introduce machine_kexec_mask_interrupts(), as arch/arm has,
      to discard unexpected interrupts
    - call crash_save_cpu() before making cpus offline to avoid a possible race
      (thanks to Pratyush Anand/Mark Salter, RH)
  o arm64: kdump: update a kernel doc
    - clarify that we support "Image" format as well as vmlinux in kdump.txt
  o arm64: kdump: relax BUG_ON() if more than one cpus are still active
    - change a warning message at the failure of shooting down non-crahsing cpus

Changes for v12 (Nov 24, 2015, 18m):

  o No changes, rebase to Linux-4.4-rc2.

Changes for v11 (Nov 6, 2015, 18m):

  o Rebase to Linux-4.3.
  o Move the new image d-cache flush from arm64_relocate_new_kernel to machine_kexec.
  o Pass values to arm64_relocate_new_kernel in registers, not in global variables.
  o Fixups to setting the sctlr_el1 and sctlr_el2 flags.

Changes for v10 (Oct 18, 2015, 17m):

  o Rebase to Linux-4.3-rc6.
  o Move tcr_set_idmap_t0sz to assembler.h.
  o Add back simplified cpu_reset routines.
  o Combine kexec + kdump patches.

Changes for v9 (Apr 7, 2015, 11m):

  o Use new upstream flag IND_FLAGS.

Changes for v8 (Mar 19, 2015, 10m):

  o Rebase to Linux-4.0-rc4.
  o Re-boot using purgatory only.

Changes for v7 (Jan 16, 2015, 8m):

  o Rebase to Linux-3.19-rc4.
  o Change from ESR_EL2_ to ESR_ELx_.
  o Remove work-arounds for EFI systems.
  
Changes for v6 (Dec 2, 2014, 7m):

  o Rebase to Linux-3.18-rc2

Changes for v5 (Nov 16, 2014, 6m):

Changes for v4 (Oct 3, 2014, 5m):

Changes for v3 (Sept 23, 2014, 4m):

Changes for v2 (Sep 9, 2014, 4m):

  o Rebase to Linux-3.17-rc4.
  o Move macros from proc-macros.S to assembler.h.
  o Convert hcalls to use ISS field.
  o Add new hcall HVC_CALL_FUNC.
  o Add EL2 switch to soft_restart.

First submission v1 (May 13, 2014):

  o Based on Linux-3.15-rc4.

-Geoff

The following changes since commit afd2ff9b7e1b367172f18ba7f693dfb62bdcb2dc:

  Linux 4.4 (2016-01-10 15:01:32 -0800)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/geoff/linux-kexec.git kexec-v13

for you to fetch changes up to e52b39c76d2a967bdecc376daee07d6edd47b01a:

  arm64: kdump: relax BUG_ON() if more than one cpus are still active (2016-01-15 10:22:45 -0800)

----------------------------------------------------------------
AKASHI Takahiro (7):
      arm64: kvm: allows kvm cpu hotplug
      arm64: kdump: reserve memory for crash dump kernel
      arm64: kdump: implement machine_crash_shutdown()
      arm64: kdump: add kdump support
      arm64: kdump: enable kdump in the arm64 defconfig
      arm64: kdump: update a kernel doc
      arm64: kdump: relax BUG_ON() if more than one cpus are still active

Geoff Levand (11):
      arm64: Fold proc-macros.S into assembler.h
      arm64: Add new asm macro copy_page
      arm64: Cleanup SCTLR flags
      arm64: Convert hcalls to use HVC immediate value
      arm64: Add new hcall HVC_CALL_FUNC
      arm64: Add back cpu_reset routines
      Revert "arm64: mm: remove unused cpu_set_idmap_tcr_t0sz function"
      Revert "arm64: remove dead code"
      arm64/kexec: Add core kexec support
      arm64/kexec: Enable kexec in the arm64 defconfig
      arm64/kexec: Add pr_debug output

James Morse (1):
      arm64: kernel: Include _AC definition in page.h

 Documentation/kdump/kdump.txt        |  23 ++-
 arch/arm/include/asm/kvm_host.h      |  10 +-
 arch/arm/include/asm/kvm_mmu.h       |   1 +
 arch/arm/kvm/arm.c                   |  93 +++++++-----
 arch/arm/kvm/mmu.c                   |   5 +
 arch/arm64/Kconfig                   |  21 +++
 arch/arm64/configs/defconfig         |   2 +
 arch/arm64/include/asm/assembler.h   |  67 +++++++-
 arch/arm64/include/asm/kexec.h       |  93 ++++++++++++
 arch/arm64/include/asm/kvm_arm.h     |  11 --
 arch/arm64/include/asm/kvm_host.h    |  11 +-
 arch/arm64/include/asm/kvm_mmu.h     |   1 +
 arch/arm64/include/asm/mmu.h         |   1 +
 arch/arm64/include/asm/mmu_context.h |  35 +++--
 arch/arm64/include/asm/page.h        |   2 +
 arch/arm64/include/asm/sysreg.h      |  19 ++-
 arch/arm64/include/asm/virt.h        |  49 ++++++
 arch/arm64/kernel/Makefile           |   3 +
 arch/arm64/kernel/cpu-reset.S        |  57 +++++++
 arch/arm64/kernel/cpu-reset.h        |  29 ++++
 arch/arm64/kernel/crash_dump.c       |  71 +++++++++
 arch/arm64/kernel/hyp-stub.S         |  43 ++++--
 arch/arm64/kernel/machine_kexec.c    | 285 +++++++++++++++++++++++++++++++++++
 arch/arm64/kernel/relocate_kernel.S  | 131 ++++++++++++++++
 arch/arm64/kernel/setup.c            |   7 +-
 arch/arm64/kernel/smp.c              |  18 ++-
 arch/arm64/kvm/hyp-init.S            |  39 ++++-
 arch/arm64/kvm/hyp.S                 |  44 ++++--
 arch/arm64/mm/cache.S                |   2 -
 arch/arm64/mm/init.c                 |  89 +++++++++++
 arch/arm64/mm/mmu.c                  |  11 ++
 arch/arm64/mm/proc-macros.S          |  64 --------
 arch/arm64/mm/proc.S                 |   3 -
 include/uapi/linux/kexec.h           |   1 +
 34 files changed, 1175 insertions(+), 166 deletions(-)
 create mode 100644 arch/arm64/include/asm/kexec.h
 create mode 100644 arch/arm64/kernel/cpu-reset.S
 create mode 100644 arch/arm64/kernel/cpu-reset.h
 create mode 100644 arch/arm64/kernel/crash_dump.c
 create mode 100644 arch/arm64/kernel/machine_kexec.c
 create mode 100644 arch/arm64/kernel/relocate_kernel.S
 delete mode 100644 arch/arm64/mm/proc-macros.S

-- 
2.5.0


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 03/19] arm64: Add new asm macro copy_page
@ 2016-01-15 19:18   ` Geoff Levand
  0 siblings, 0 replies; 174+ messages in thread
From: Geoff Levand @ 2016-01-15 19:18 UTC (permalink / raw)
  To: Catalin Marinas, Will Deacon
  Cc: Mark Rutland, marc.zyngier, kexec, AKASHI Takahiro, James Morse,
	linux-arm-kernel, christoffer.dall

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/include/asm/assembler.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 21979a4..c47a623 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -24,6 +24,7 @@
 #define __ASM_ASSEMBLER_H
 
 #include <asm/asm-offsets.h>
+#include <asm/page.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/ptrace.h>
 #include <asm/thread_info.h>
@@ -240,6 +241,24 @@ lr	.req	x30		// link register
 	.endm
 
 /*
+ * copy_page - copy src to dest using temp registers t1-t8
+ */
+	.macro copy_page dest:req src:req t1:req t2:req t3:req t4:req t5:req t6:req t7:req t8:req
+1:	ldp	\t1, \t2, [\src]
+	ldp	\t3, \t4, [\src, #16]
+	ldp	\t5, \t6, [\src, #32]
+	ldp	\t7, \t8, [\src, #48]
+	add	\src, \src, #64
+	stnp	\t1, \t2, [\dest]
+	stnp	\t3, \t4, [\dest, #16]
+	stnp	\t5, \t6, [\dest, #32]
+	stnp	\t7, \t8, [\dest, #48]
+	add	\dest, \dest, #64
+	tst	\src, #(PAGE_SIZE - 1)
+	b.ne	1b
+	.endm
+
+/*
  * Annotate a function as position independent, i.e., safe to be called before
  * the kernel virtual mapping is activated.
  */
-- 
2.5.0



_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 174+ messages in thread

* [PATCH 04/19] arm64: Cleanup SCTLR flags
@ 2016-01-15 19:18   ` Geoff Levand
  0 siblings, 0 replies; 174+ messages in thread
From: Geoff Levand @ 2016-01-15 19:18 UTC (permalink / raw)
  To: Catalin Marinas, Will Deacon
  Cc: Mark Rutland, marc.zyngier, kexec, AKASHI Takahiro, James Morse,
	linux-arm-kernel, christoffer.dall

We currently have macros defining flags for the arm64 sctlr registers in both
kvm_arm.h and sysreg.h.  To clean things up and simplify move the definitions
of the SCTLR_EL2 flags from kvm_arm.h to sysreg.h, rename any SCTLR_EL1 or
SCTLR_EL2 flags that are common to both registers to be SCTLR_ELx, with 'x'
indicating a common flag, and fixup all files to include the proper header or
to use the new macro names.

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/include/asm/kvm_arm.h | 11 -----------
 arch/arm64/include/asm/sysreg.h  | 19 +++++++++++++++----
 arch/arm64/kvm/hyp-init.S        |  6 +++---
 3 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 5e6857b..92ef6f6 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -83,17 +83,6 @@
 #define HCR_INT_OVERRIDE   (HCR_FMO | HCR_IMO)
 
 
-/* Hyp System Control Register (SCTLR_EL2) bits */
-#define SCTLR_EL2_EE	(1 << 25)
-#define SCTLR_EL2_WXN	(1 << 19)
-#define SCTLR_EL2_I	(1 << 12)
-#define SCTLR_EL2_SA	(1 << 3)
-#define SCTLR_EL2_C	(1 << 2)
-#define SCTLR_EL2_A	(1 << 1)
-#define SCTLR_EL2_M	1
-#define SCTLR_EL2_FLAGS	(SCTLR_EL2_M | SCTLR_EL2_A | SCTLR_EL2_C |	\
-			 SCTLR_EL2_SA | SCTLR_EL2_I)
-
 /* TCR_EL2 Registers bits */
 #define TCR_EL2_RES1	((1 << 31) | (1 << 23))
 #define TCR_EL2_TBI	(1 << 20)
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index d48ab5b..109d46e 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -80,10 +80,21 @@
 #define SET_PSTATE_PAN(x) __inst_arm(0xd5000000 | REG_PSTATE_PAN_IMM |\
 				     (!!x)<<8 | 0x1f)
 
-/* SCTLR_EL1 */
-#define SCTLR_EL1_CP15BEN	(0x1 << 5)
-#define SCTLR_EL1_SED		(0x1 << 8)
-#define SCTLR_EL1_SPAN		(0x1 << 23)
+/* Common SCTLR_ELx flags. */
+#define SCTLR_ELx_EE    (1 << 25)
+#define SCTLR_ELx_I	(1 << 12)
+#define SCTLR_ELx_SA	(1 << 3)
+#define SCTLR_ELx_C	(1 << 2)
+#define SCTLR_ELx_A	(1 << 1)
+#define SCTLR_ELx_M	1
+
+#define SCTLR_ELx_FLAGS	(SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \
+			 SCTLR_ELx_SA | SCTLR_ELx_I)
+
+/* SCTLR_EL1 specific flags. */
+#define SCTLR_EL1_SPAN		(1 << 23)
+#define SCTLR_EL1_SED		(1 << 8)
+#define SCTLR_EL1_CP15BEN	(1 << 5)
 
 
 /* id_aa64isar0 */
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
index 178ba22..1d7e502 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -20,7 +20,7 @@
 #include <asm/assembler.h>
 #include <asm/kvm_arm.h>
 #include <asm/kvm_mmu.h>
-#include <asm/pgtable-hwdef.h>
+#include <asm/sysreg.h>
 
 	.text
 	.pushsection	.hyp.idmap.text, "ax"
@@ -105,8 +105,8 @@ __do_hyp_init:
 	dsb	sy
 
 	mrs	x4, sctlr_el2
-	and	x4, x4, #SCTLR_EL2_EE	// preserve endianness of EL2
-	ldr	x5, =SCTLR_EL2_FLAGS
+	and	x4, x4, #SCTLR_ELx_EE	// preserve endianness of EL2
+	ldr	x5, =SCTLR_ELx_FLAGS
 	orr	x4, x4, x5
 	msr	sctlr_el2, x4
 	isb
-- 
2.5.0



_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 174+ messages in thread

* [PATCH 01/19] arm64: Fold proc-macros.S into assembler.h
@ 2016-01-15 19:18   ` Geoff Levand
  0 siblings, 0 replies; 174+ messages in thread
From: Geoff Levand @ 2016-01-15 19:18 UTC (permalink / raw)
  To: Catalin Marinas, Will Deacon
  Cc: Mark Rutland, marc.zyngier, kexec, AKASHI Takahiro, James Morse,
	linux-arm-kernel, christoffer.dall

To allow the assembler macros defined in arch/arm64/mm/proc-macros.S to be used
outside the mm code move the contents of proc-macros.S to asm/assembler.h.  Also,
delete proc-macros.S, and fix up all references to proc-macros.S.

Signed-off-by: Geoff Levand <geoff@infradead.org>
Acked-by: Pavel Machek <pavel@ucw.cz>
---
 arch/arm64/include/asm/assembler.h | 48 +++++++++++++++++++++++++++-
 arch/arm64/mm/cache.S              |  2 --
 arch/arm64/mm/proc-macros.S        | 64 --------------------------------------
 arch/arm64/mm/proc.S               |  3 --
 4 files changed, 47 insertions(+), 70 deletions(-)
 delete mode 100644 arch/arm64/mm/proc-macros.S

diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 12eff92..21979a4 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -1,5 +1,5 @@
 /*
- * Based on arch/arm/include/asm/assembler.h
+ * Based on arch/arm/include/asm/assembler.h, arch/arm/mm/proc-macros.S
  *
  * Copyright (C) 1996-2000 Russell King
  * Copyright (C) 2012 ARM Ltd.
@@ -23,6 +23,8 @@
 #ifndef __ASM_ASSEMBLER_H
 #define __ASM_ASSEMBLER_H
 
+#include <asm/asm-offsets.h>
+#include <asm/pgtable-hwdef.h>
 #include <asm/ptrace.h>
 #include <asm/thread_info.h>
 
@@ -194,6 +196,50 @@ lr	.req	x30		// link register
 	.endm
 
 /*
+ * vma_vm_mm - get mm pointer from vma pointer (vma->vm_mm)
+ */
+	.macro	vma_vm_mm, rd, rn
+	ldr	\rd, [\rn, #VMA_VM_MM]
+	.endm
+
+/*
+ * mmid - get context id from mm pointer (mm->context.id)
+ */
+	.macro	mmid, rd, rn
+	ldr	\rd, [\rn, #MM_CONTEXT_ID]
+	.endm
+
+/*
+ * dcache_line_size - get the minimum D-cache line size from the CTR register.
+ */
+	.macro	dcache_line_size, reg, tmp
+	mrs	\tmp, ctr_el0			// read CTR
+	ubfm	\tmp, \tmp, #16, #19		// cache line size encoding
+	mov	\reg, #4			// bytes per word
+	lsl	\reg, \reg, \tmp		// actual cache line size
+	.endm
+
+/*
+ * icache_line_size - get the minimum I-cache line size from the CTR register.
+ */
+	.macro	icache_line_size, reg, tmp
+	mrs	\tmp, ctr_el0			// read CTR
+	and	\tmp, \tmp, #0xf		// cache line size encoding
+	mov	\reg, #4			// bytes per word
+	lsl	\reg, \reg, \tmp		// actual cache line size
+	.endm
+
+/*
+ * tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map
+ */
+	.macro	tcr_set_idmap_t0sz, valreg, tmpreg
+#ifndef CONFIG_ARM64_VA_BITS_48
+	ldr_l	\tmpreg, idmap_t0sz
+	bfi	\valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH
+#endif
+	.endm
+
+/*
  * Annotate a function as position independent, i.e., safe to be called before
  * the kernel virtual mapping is activated.
  */
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index cfa44a6..f49041d 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -24,8 +24,6 @@
 #include <asm/cpufeature.h>
 #include <asm/alternative.h>
 
-#include "proc-macros.S"
-
 /*
  *	flush_icache_range(start,end)
  *
diff --git a/arch/arm64/mm/proc-macros.S b/arch/arm64/mm/proc-macros.S
deleted file mode 100644
index 4c4d93c..0000000
--- a/arch/arm64/mm/proc-macros.S
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Based on arch/arm/mm/proc-macros.S
- *
- * Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
-
-/*
- * vma_vm_mm - get mm pointer from vma pointer (vma->vm_mm)
- */
-	.macro	vma_vm_mm, rd, rn
-	ldr	\rd, [\rn, #VMA_VM_MM]
-	.endm
-
-/*
- * mmid - get context id from mm pointer (mm->context.id)
- */
-	.macro	mmid, rd, rn
-	ldr	\rd, [\rn, #MM_CONTEXT_ID]
-	.endm
-
-/*
- * dcache_line_size - get the minimum D-cache line size from the CTR register.
- */
-	.macro	dcache_line_size, reg, tmp
-	mrs	\tmp, ctr_el0			// read CTR
-	ubfm	\tmp, \tmp, #16, #19		// cache line size encoding
-	mov	\reg, #4			// bytes per word
-	lsl	\reg, \reg, \tmp		// actual cache line size
-	.endm
-
-/*
- * icache_line_size - get the minimum I-cache line size from the CTR register.
- */
-	.macro	icache_line_size, reg, tmp
-	mrs	\tmp, ctr_el0			// read CTR
-	and	\tmp, \tmp, #0xf		// cache line size encoding
-	mov	\reg, #4			// bytes per word
-	lsl	\reg, \reg, \tmp		// actual cache line size
-	.endm
-
-/*
- * tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map
- */
-	.macro	tcr_set_idmap_t0sz, valreg, tmpreg
-#ifndef CONFIG_ARM64_VA_BITS_48
-	ldr_l	\tmpreg, idmap_t0sz
-	bfi	\valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH
-#endif
-	.endm
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index cacecc4..7ab3a90 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -23,11 +23,8 @@
 #include <asm/assembler.h>
 #include <asm/asm-offsets.h>
 #include <asm/hwcap.h>
-#include <asm/pgtable-hwdef.h>
 #include <asm/pgtable.h>
 
-#include "proc-macros.S"
-
 #ifdef CONFIG_ARM64_64K_PAGES
 #define TCR_TG_FLAGS	TCR_TG0_64K | TCR_TG1_64K
 #elif defined(CONFIG_ARM64_16K_PAGES)
-- 
2.5.0



_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 174+ messages in thread

* [PATCH 05/19] arm64: Convert hcalls to use HVC immediate value
@ 2016-01-15 19:18   ` Geoff Levand
  0 siblings, 0 replies; 174+ messages in thread
From: Geoff Levand @ 2016-01-15 19:18 UTC (permalink / raw)
  To: Catalin Marinas, Will Deacon
  Cc: Mark Rutland, marc.zyngier, kexec, AKASHI Takahiro, James Morse,
	linux-arm-kernel, christoffer.dall

The existing arm64 hcall implementations are limited in that they only allow
for two distinct hcalls; with the x0 register either zero or not zero.  Also,
the API of the hyp-stub exception vector routines and the KVM exception vector
routines differ; hyp-stub uses a non-zero value in x0 to implement
__hyp_set_vectors, whereas KVM uses it to implement kvm_call_hyp.

To allow for additional hcalls to be defined and to make the arm64 hcall API
more consistent across exception vector routines, change the hcall
implementations to use the 16 bit immediate value of the HVC instruction to
specify the hcall type.

Define three new preprocessor macros HVC_CALL_HYP, HVC_GET_VECTORS, and
HVC_SET_VECTORS to be used as hcall type specifiers and convert the
existing __hyp_get_vectors(), __hyp_set_vectors() and kvm_call_hyp() routines
to use these new macros when executing an HVC call.  Also, change the
corresponding hyp-stub and KVM el1_sync exception vector routines to use these
new macros.

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/include/asm/virt.h | 27 +++++++++++++++++++++++++++
 arch/arm64/kernel/hyp-stub.S  | 32 +++++++++++++++++++++-----------
 arch/arm64/kvm/hyp.S          | 16 +++++++++-------
 3 files changed, 57 insertions(+), 18 deletions(-)

diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 7a5df52..eb10368 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -18,6 +18,33 @@
 #ifndef __ASM__VIRT_H
 #define __ASM__VIRT_H
 
+/*
+ * The arm64 hcall implementation uses the ISS field of the ESR_EL2 register to
+ * specify the hcall type.  The exception handlers are allowed to use registers
+ * x17 and x18 in their implementation.  Any routine issuing an hcall must not
+ * expect these registers to be preserved.
+ */
+
+/*
+ * HVC_CALL_HYP - Execute a hyp routine.
+ */
+
+#define HVC_CALL_HYP 0
+
+/*
+ * HVC_GET_VECTORS - Return the value of the vbar_el2 register.
+ */
+
+#define HVC_GET_VECTORS 1
+
+/*
+ * HVC_SET_VECTORS - Set the value of the vbar_el2 register.
+ *
+ * @x0: Physical address of the new vector table.
+ */
+
+#define HVC_SET_VECTORS 2
+
 #define BOOT_CPU_MODE_EL1	(0xe11)
 #define BOOT_CPU_MODE_EL2	(0xe12)
 
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index a272f33..017ab519 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -22,6 +22,7 @@
 #include <linux/irqchip/arm-gic-v3.h>
 
 #include <asm/assembler.h>
+#include <asm/kvm_arm.h>
 #include <asm/ptrace.h>
 #include <asm/virt.h>
 
@@ -53,14 +54,22 @@ ENDPROC(__hyp_stub_vectors)
 	.align 11
 
 el1_sync:
-	mrs	x1, esr_el2
-	lsr	x1, x1, #26
-	cmp	x1, #0x16
+	mrs	x18, esr_el2
+	lsr	x17, x18, #ESR_ELx_EC_SHIFT
+	and	x18, x18, #ESR_ELx_ISS_MASK
+
+	cmp	x17, #ESR_ELx_EC_HVC64
 	b.ne	2f				// Not an HVC trap
-	cbz	x0, 1f
-	msr	vbar_el2, x0			// Set vbar_el2
+
+	cmp	x18, #HVC_GET_VECTORS
+	b.ne	1f
+	mrs	x0, vbar_el2
 	b	2f
-1:	mrs	x0, vbar_el2			// Return vbar_el2
+
+1:	cmp	x18, #HVC_SET_VECTORS
+	b.ne	2f
+	msr	vbar_el2, x0
+
 2:	eret
 ENDPROC(el1_sync)
 
@@ -100,11 +109,12 @@ ENDPROC(\label)
  * initialisation entry point.
  */
 
-ENTRY(__hyp_get_vectors)
-	mov	x0, xzr
-	// fall through
 ENTRY(__hyp_set_vectors)
-	hvc	#0
+	hvc	#HVC_SET_VECTORS
 	ret
-ENDPROC(__hyp_get_vectors)
 ENDPROC(__hyp_set_vectors)
+
+ENTRY(__hyp_get_vectors)
+	hvc	#HVC_GET_VECTORS
+	ret
+ENDPROC(__hyp_get_vectors)
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 86c2898..15b1ef9 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -29,6 +29,7 @@
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmu.h>
 #include <asm/memory.h>
+#include <asm/virt.h>
 
 #define CPU_GP_REG_OFFSET(x)	(CPU_GP_REGS + x)
 #define CPU_XREG_OFFSET(x)	CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x)
@@ -936,12 +937,9 @@ __hyp_panic_str:
  * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c).  Return values are
  * passed in r0 and r1.
  *
- * A function pointer with a value of 0 has a special meaning, and is
- * used to implement __hyp_get_vectors in the same way as in
- * arch/arm64/kernel/hyp_stub.S.
  */
 ENTRY(kvm_call_hyp)
-	hvc	#0
+	hvc	#HVC_CALL_HYP
 	ret
 ENDPROC(kvm_call_hyp)
 
@@ -972,6 +970,7 @@ el1_sync:					// Guest trapped into EL2
 
 	mrs	x1, esr_el2
 	lsr	x2, x1, #ESR_ELx_EC_SHIFT
+	and	x0, x1, #ESR_ELx_ISS_MASK
 
 	cmp	x2, #ESR_ELx_EC_HVC64
 	b.ne	el1_trap
@@ -980,15 +979,18 @@ el1_sync:					// Guest trapped into EL2
 	cbnz	x3, el1_trap			// called HVC
 
 	/* Here, we're pretty sure the host called HVC. */
+	mov	x18, x0
 	pop	x2, x3
 	pop	x0, x1
 
-	/* Check for __hyp_get_vectors */
-	cbnz	x0, 1f
+	cmp	x18, #HVC_GET_VECTORS
+	b.ne	1f
 	mrs	x0, vbar_el2
 	b	2f
 
-1:	push	lr, xzr
+1:	/* Default to HVC_CALL_HYP. */
+
+	push	lr, xzr
 
 	/*
 	 * Compute the function address in EL2, and shuffle the parameters.
-- 
2.5.0



_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 174+ messages in thread

* [PATCH 02/19] arm64: kernel: Include _AC definition in page.h
@ 2016-01-15 19:18   ` Geoff Levand
  0 siblings, 0 replies; 174+ messages in thread
From: Geoff Levand @ 2016-01-15 19:18 UTC (permalink / raw)
  To: Catalin Marinas, Will Deacon
  Cc: Mark Rutland, marc.zyngier, kexec, AKASHI Takahiro, James Morse,
	linux-arm-kernel, christoffer.dall

From: James Morse <james.morse@arm.com>

From: James Morse <james.morse@arm.com>

page.h uses '_AC' in the definition of PAGE_SIZE, but doesn't include
linux/const.h where this is defined. This produces build warnings when only
asm/page.h is included by asm code.

Signed-off-by: James Morse <james.morse@arm.com>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/include/asm/page.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index 9b2f5a9..fbafd0a 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -19,6 +19,8 @@
 #ifndef __ASM_PAGE_H
 #define __ASM_PAGE_H
 
+#include <linux/const.h>
+
 /* PAGE_SHIFT determines the page size */
 /* CONT_SHIFT determines the number of pages which can be tracked together  */
 #ifdef CONFIG_ARM64_64K_PAGES
-- 
2.5.0



_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 174+ messages in thread

* [PATCH 16/19] arm64: kdump: add kdump support
  2016-01-15 19:18 ` Geoff Levand
@ 2016-01-15 19:18   ` Geoff Levand
  -1 siblings, 0 replies; 174+ messages in thread
From: Geoff Levand @ 2016-01-15 19:18 UTC (permalink / raw)
  To: linux-arm-kernel

From: AKASHI Takahiro <takahiro.akashi@linaro.org>

On crash dump kernel, all the information about primary kernel's core
image is available in elf core header specified by "elfcorehdr=" boot
parameter. reserve_elfcorehdr() will set aside the region to avoid any
corruption by crash dump kernel.

Crash dump kernel will access the system memory of primary kernel via
copy_oldmem_page(), which reads one page by ioremap'ing it since it does
not reside in linear mapping on crash dump kernel.
Please note that we should add "mem=X[MG]" boot parameter to limit the
memory size and avoid the following assertion at ioremap():
	if (WARN_ON(pfn_valid(__phys_to_pfn(phys_addr))))
		return NULL;
when accessing any pages beyond the usable memories of crash dump kernel.

We also need our own elfcorehdr_read() here since the weak definition of
elfcorehdr_read() utilizes copy_oldmem_page() and will hit the assertion
above on arm64.

Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
---
 arch/arm64/Kconfig             | 11 +++++++
 arch/arm64/kernel/Makefile     |  1 +
 arch/arm64/kernel/crash_dump.c | 71 ++++++++++++++++++++++++++++++++++++++++++
 arch/arm64/mm/init.c           | 32 +++++++++++++++++++
 4 files changed, 115 insertions(+)
 create mode 100644 arch/arm64/kernel/crash_dump.c

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index b7d0078..31901d0 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -566,6 +566,17 @@ config KEXEC
 	  but it is independent of the system firmware.   And like a reboot
 	  you can start any kernel with it, not just Linux.
 
+config CRASH_DUMP
+	bool "Build kdump crash kernel"
+	help
+	  Generate crash dump after being started by kexec. This should
+	  be normally only set in special crash dump kernels which are
+	  loaded in the main kernel with kexec-tools into a specially
+	  reserved region and then later executed after a crash by
+	  kdump/kexec.
+
+	  For more details see Documentation/kdump/kdump.txt
+
 config XEN_DOM0
 	def_bool y
 	depends on XEN
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index f68420d..a08b054 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -43,6 +43,7 @@ arm64-obj-$(CONFIG_ARMV8_DEPRECATED)	+= armv8_deprecated.o
 arm64-obj-$(CONFIG_ACPI)		+= acpi.o
 arm64-obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o	\
 					   cpu-reset.o
+arm64-obj-$(CONFIG_CRASH_DUMP)		+= crash_dump.o
 
 obj-y					+= $(arm64-obj-y) vdso/
 obj-m					+= $(arm64-obj-m)
diff --git a/arch/arm64/kernel/crash_dump.c b/arch/arm64/kernel/crash_dump.c
new file mode 100644
index 0000000..2dc54d1
--- /dev/null
+++ b/arch/arm64/kernel/crash_dump.c
@@ -0,0 +1,71 @@
+/*
+ * Routines for doing kexec-based kdump
+ *
+ * Copyright (C) 2014 Linaro Limited
+ * Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/crash_dump.h>
+#include <linux/errno.h>
+#include <linux/io.h>
+#include <linux/memblock.h>
+#include <linux/uaccess.h>
+#include <asm/memory.h>
+
+/**
+ * copy_oldmem_page() - copy one page from old kernel memory
+ * @pfn: page frame number to be copied
+ * @buf: buffer where the copied page is placed
+ * @csize: number of bytes to copy
+ * @offset: offset in bytes into the page
+ * @userbuf: if set, @buf is in a user address space
+ *
+ * This function copies one page from old kernel memory into buffer pointed by
+ * @buf. If @buf is in userspace, set @userbuf to %1. Returns number of bytes
+ * copied or negative error in case of failure.
+ */
+ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
+			 size_t csize, unsigned long offset,
+			 int userbuf)
+{
+	void *vaddr;
+
+	if (!csize)
+		return 0;
+
+	vaddr = ioremap_cache(__pfn_to_phys(pfn), PAGE_SIZE);
+	if (!vaddr)
+		return -ENOMEM;
+
+	if (userbuf) {
+		if (copy_to_user(buf, vaddr + offset, csize)) {
+			iounmap(vaddr);
+			return -EFAULT;
+		}
+	} else {
+		memcpy(buf, vaddr + offset, csize);
+	}
+
+	iounmap(vaddr);
+
+	return csize;
+}
+
+/**
+ * elfcorehdr_read - read from ELF core header
+ * @buf: buffer where the data is placed
+ * @csize: number of bytes to read
+ * @ppos: address in the memory
+ *
+ * This function reads @count bytes from elf core header which exists
+ * on crash dump kernel's memory.
+ */
+ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos)
+{
+	memcpy(buf, phys_to_virt((phys_addr_t)*ppos), count);
+	return count;
+}
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index a8eae6b..7398a6b 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -35,6 +35,7 @@
 #include <linux/efi.h>
 #include <linux/swiotlb.h>
 #include <linux/kexec.h>
+#include <linux/crash_dump.h>
 
 #include <asm/fixmap.h>
 #include <asm/memory.h>
@@ -119,6 +120,36 @@ static void __init reserve_crashkernel(void)
 }
 #endif /* CONFIG_KEXEC_CORE */
 
+#ifdef CONFIG_CRASH_DUMP
+/*
+ * reserve_elfcorehdr() - reserves memory for elf core header
+ *
+ * This function reserves elf core header given in "elfcorehdr=" kernel
+ * command line parameter. This region contains all the information about
+ * primary kernel's core image and is used by a dump capture kernel to
+ * access the system memory on primary kernel.
+ */
+static void __init reserve_elfcorehdr(void)
+{
+	if (!elfcorehdr_size)
+		return;
+
+	if (memblock_is_region_reserved(elfcorehdr_addr, elfcorehdr_size)) {
+		pr_warn("elfcorehdr is overlapped\n");
+		return;
+	}
+
+	memblock_reserve(elfcorehdr_addr, elfcorehdr_size);
+
+	pr_info("Reserving %lldKB of memory at 0x%llx for elfcorehdr\n",
+		elfcorehdr_size >> 10, elfcorehdr_addr);
+}
+#else
+static void __init reserve_elfcorehdr(void)
+{
+	;
+}
+#endif /* CONFIG_CRASH_DUMP */
 /*
  * Return the maximum physical address for ZONE_DMA (DMA_BIT_MASK(32)). It
  * currently assumes that for memory starting above 4G, 32-bit devices will
@@ -227,6 +258,7 @@ void __init arm64_memblock_init(void)
 #ifdef CONFIG_KEXEC_CORE
 	reserve_crashkernel();
 #endif
+	reserve_elfcorehdr();
 
 	early_init_fdt_scan_reserved_mem();
 
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 174+ messages in thread

* [PATCH 18/19] arm64: kdump: update a kernel doc
  2016-01-15 19:18 ` Geoff Levand
@ 2016-01-15 19:18   ` Geoff Levand
  -1 siblings, 0 replies; 174+ messages in thread
From: Geoff Levand @ 2016-01-15 19:18 UTC (permalink / raw)
  To: linux-arm-kernel

From: AKASHI Takahiro <takahiro.akashi@linaro.org>

This patch adds arch specific descriptions about kdump usage on arm64
to kdump.txt.

Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
---
 Documentation/kdump/kdump.txt | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
index bc4bd5a..36cf978 100644
--- a/Documentation/kdump/kdump.txt
+++ b/Documentation/kdump/kdump.txt
@@ -18,7 +18,7 @@ memory image to a dump file on the local disk, or across the network to
 a remote system.
 
 Kdump and kexec are currently supported on the x86, x86_64, ppc64, ia64,
-s390x and arm architectures.
+s390x, arm and arm64 architectures.
 
 When the system kernel boots, it reserves a small section of memory for
 the dump-capture kernel. This ensures that ongoing Direct Memory Access
@@ -249,6 +249,20 @@ Dump-capture kernel config options (Arch Dependent, arm)
 
     AUTO_ZRELADDR=y
 
+Dump-capture kernel config options (Arch Dependent, arm64)
+----------------------------------------------------------
+
+1) The maximum memory size on the dump-capture kernel must be limited by
+   specifying:
+
+   mem=X[MG]
+
+   where X should be less than or equal to the size in "crashkernel="
+   boot parameter. Kexec-tools will automatically add this.
+
+2) Currently, kvm will not be enabled on the dump-capture kernel even
+   if it is configured.
+
 Extended crashkernel syntax
 ===========================
 
@@ -312,6 +326,8 @@ Boot into System Kernel
    any space below the alignment point may be overwritten by the dump-capture kernel,
    which means it is possible that the vmcore is not that precise as expected.
 
+   On arm64, use "crashkernel=Y[@X]".  Note that the start address of
+   the kernel, X if explicitly specified, must be aligned to 2MiB (0x200000).
 
 Load the Dump-capture Kernel
 ============================
@@ -334,6 +350,8 @@ For s390x:
 	- Use image or bzImage
 For arm:
 	- Use zImage
+For arm64:
+	- Use vmlinux or Image
 
 If you are using a uncompressed vmlinux image then use following command
 to load dump-capture kernel.
@@ -377,6 +395,9 @@ For s390x:
 For arm:
 	"1 maxcpus=1 reset_devices"
 
+For arm64:
+	"1 mem=X[MG] maxcpus=1 reset_devices"
+
 Notes on loading the dump-capture kernel:
 
 * By default, the ELF headers are stored in ELF64 format to support
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 174+ messages in thread

* [PATCH 14/19] arm64: kdump: reserve memory for crash dump kernel
  2016-01-15 19:18 ` Geoff Levand
@ 2016-01-15 19:18   ` Geoff Levand
  -1 siblings, 0 replies; 174+ messages in thread
From: Geoff Levand @ 2016-01-15 19:18 UTC (permalink / raw)
  To: linux-arm-kernel

From: AKASHI Takahiro <takahiro.akashi@linaro.org>

On primary kernel, the memory region used by crash dump kernel must be
specified by "crashkernel=" boot parameter. reserve_crashkernel()
will allocate and reserve the region for later use.

User space tools will be able to find the region marked as "Crash kernel"
in /proc/iomem.

Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
Signed-off-by: Mark Salter <msalter@redhat.com>
Signed-off-by: Pratyush Anand <panand@redhat.com>
---
 arch/arm64/kernel/setup.c |  7 +++++-
 arch/arm64/mm/init.c      | 57 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 63 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 8119479..293cee2 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -31,7 +31,6 @@
 #include <linux/screen_info.h>
 #include <linux/init.h>
 #include <linux/kexec.h>
-#include <linux/crash_dump.h>
 #include <linux/root_dev.h>
 #include <linux/cpu.h>
 #include <linux/interrupt.h>
@@ -221,6 +220,12 @@ static void __init request_standard_resources(void)
 		    kernel_data.end <= res->end)
 			request_resource(res, &kernel_data);
 	}
+
+#ifdef CONFIG_KEXEC_CORE
+	/* User space tools will find "Crash kernel" region in /proc/iomem. */
+	if (crashk_res.end)
+		insert_resource(&iomem_resource, &crashk_res);
+#endif
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 17bf39a..a8eae6b 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -34,6 +34,7 @@
 #include <linux/dma-contiguous.h>
 #include <linux/efi.h>
 #include <linux/swiotlb.h>
+#include <linux/kexec.h>
 
 #include <asm/fixmap.h>
 #include <asm/memory.h>
@@ -66,6 +67,58 @@ static int __init early_initrd(char *p)
 early_param("initrd", early_initrd);
 #endif
 
+#ifdef CONFIG_KEXEC_CORE
+/*
+ * reserve_crashkernel() - reserves memory for crash kernel
+ *
+ * This function reserves memory area given in "crashkernel=" kernel command
+ * line parameter. The memory reserved is used by dump capture kernel when
+ * primary kernel is crashing.
+ */
+static void __init reserve_crashkernel(void)
+{
+	unsigned long long crash_size = 0, crash_base = 0;
+	int ret;
+
+	ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
+				&crash_size, &crash_base);
+	if (ret)
+		return;
+
+	if (crash_base == 0) {
+		crash_base = memblock_find_in_range(0,
+				MEMBLOCK_ALLOC_ACCESSIBLE, crash_size, 1 << 21);
+		if (crash_base == 0) {
+			pr_warn("Unable to allocate crashkernel (size:%llx)\n",
+				crash_size);
+			return;
+		}
+		memblock_reserve(crash_base, crash_size);
+
+	} else {
+		/* User specifies base address explicitly. */
+		if (!memblock_is_region_memory(crash_base, crash_size) ||
+			memblock_is_region_reserved(crash_base, crash_size)) {
+			pr_warn("crashkernel has wrong address or size\n");
+			return;
+		}
+
+		if (crash_base & ((1 << 21) - 1)) {
+			pr_warn("crashkernel base address is not 2MB aligned\n");
+			return;
+		}
+
+		memblock_reserve(crash_base, crash_size);
+	}
+
+	pr_info("Reserving %lldMB of memory@%lldMB for crashkernel\n",
+		crash_size >> 20, crash_base >> 20);
+
+	crashk_res.start = crash_base;
+	crashk_res.end = crash_base + crash_size - 1;
+}
+#endif /* CONFIG_KEXEC_CORE */
+
 /*
  * Return the maximum physical address for ZONE_DMA (DMA_BIT_MASK(32)). It
  * currently assumes that for memory starting above 4G, 32-bit devices will
@@ -171,6 +224,10 @@ void __init arm64_memblock_init(void)
 		memblock_reserve(__virt_to_phys(initrd_start), initrd_end - initrd_start);
 #endif
 
+#ifdef CONFIG_KEXEC_CORE
+	reserve_crashkernel();
+#endif
+
 	early_init_fdt_scan_reserved_mem();
 
 	/* 4GB maximum for 32-bit only capable devices */
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 174+ messages in thread

* [PATCH 17/19] arm64: kdump: enable kdump in the arm64 defconfig
  2016-01-15 19:18 ` Geoff Levand
@ 2016-01-15 19:18   ` Geoff Levand
  -1 siblings, 0 replies; 174+ messages in thread
From: Geoff Levand @ 2016-01-15 19:18 UTC (permalink / raw)
  To: linux-arm-kernel

From: AKASHI Takahiro <takahiro.akashi@linaro.org>

Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
---
 arch/arm64/configs/defconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 0470fdf..6dc3d00 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -59,6 +59,7 @@ CONFIG_KSM=y
 CONFIG_TRANSPARENT_HUGEPAGE=y
 CONFIG_CMA=y
 CONFIG_KEXEC=y
+CONFIG_CRASH_DUMP=y
 CONFIG_CMDLINE="console=ttyAMA0"
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
 CONFIG_COMPAT=y
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 174+ messages in thread

* [PATCH 11/19] arm64/kexec: Add core kexec support
  2016-01-15 19:18 ` Geoff Levand
@ 2016-01-15 19:18   ` Geoff Levand
  -1 siblings, 0 replies; 174+ messages in thread
From: Geoff Levand @ 2016-01-15 19:18 UTC (permalink / raw)
  To: linux-arm-kernel

Add three new files, kexec.h, machine_kexec.c and relocate_kernel.S to the
arm64 architecture that add support for the kexec re-boot mechanism
(CONFIG_KEXEC) on arm64 platforms.

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/Kconfig                  |  10 +++
 arch/arm64/include/asm/kexec.h      |  48 +++++++++++
 arch/arm64/kernel/Makefile          |   2 +
 arch/arm64/kernel/machine_kexec.c   | 158 ++++++++++++++++++++++++++++++++++++
 arch/arm64/kernel/relocate_kernel.S | 131 ++++++++++++++++++++++++++++++
 include/uapi/linux/kexec.h          |   1 +
 6 files changed, 350 insertions(+)
 create mode 100644 arch/arm64/include/asm/kexec.h
 create mode 100644 arch/arm64/kernel/machine_kexec.c
 create mode 100644 arch/arm64/kernel/relocate_kernel.S

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 871f217..b7d0078 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -556,6 +556,16 @@ config SECCOMP
 	  and the task is only allowed to execute a few safe syscalls
 	  defined by each seccomp mode.
 
+config KEXEC
+	depends on PM_SLEEP_SMP
+	select KEXEC_CORE
+	bool "kexec system call"
+	---help---
+	  kexec is a system call that implements the ability to shutdown your
+	  current kernel, and to start another kernel.  It is like a reboot
+	  but it is independent of the system firmware.   And like a reboot
+	  you can start any kernel with it, not just Linux.
+
 config XEN_DOM0
 	def_bool y
 	depends on XEN
diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
new file mode 100644
index 0000000..04744dc
--- /dev/null
+++ b/arch/arm64/include/asm/kexec.h
@@ -0,0 +1,48 @@
+/*
+ * kexec for arm64
+ *
+ * Copyright (C) Linaro.
+ * Copyright (C) Huawei Futurewei Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _ARM64_KEXEC_H
+#define _ARM64_KEXEC_H
+
+/* Maximum physical address we can use pages from */
+
+#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
+
+/* Maximum address we can reach in physical address mode */
+
+#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
+
+/* Maximum address we can use for the control code buffer */
+
+#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL)
+
+#define KEXEC_CONTROL_PAGE_SIZE 4096
+
+#define KEXEC_ARCH KEXEC_ARCH_AARCH64
+
+#ifndef __ASSEMBLY__
+
+/**
+ * crash_setup_regs() - save registers for the panic kernel
+ *
+ * @newregs: registers are saved here
+ * @oldregs: registers to be saved (may be %NULL)
+ */
+
+static inline void crash_setup_regs(struct pt_regs *newregs,
+				    struct pt_regs *oldregs)
+{
+	/* Empty routine needed to avoid build errors. */
+}
+
+#endif /* __ASSEMBLY__ */
+
+#endif
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 474691f..f68420d 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -41,6 +41,8 @@ arm64-obj-$(CONFIG_EFI)			+= efi.o efi-entry.stub.o
 arm64-obj-$(CONFIG_PCI)			+= pci.o
 arm64-obj-$(CONFIG_ARMV8_DEPRECATED)	+= armv8_deprecated.o
 arm64-obj-$(CONFIG_ACPI)		+= acpi.o
+arm64-obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o	\
+					   cpu-reset.o
 
 obj-y					+= $(arm64-obj-y) vdso/
 obj-m					+= $(arm64-obj-m)
diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
new file mode 100644
index 0000000..9379d31
--- /dev/null
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -0,0 +1,158 @@
+/*
+ * kexec for arm64
+ *
+ * Copyright (C) Linaro.
+ * Copyright (C) Huawei Futurewei Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/highmem.h>
+#include <linux/kexec.h>
+#include <linux/of_fdt.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+#include <asm/cacheflush.h>
+#include <asm/system_misc.h>
+
+#include "cpu-reset.h"
+
+/* Global variables for the arm64_relocate_new_kernel routine. */
+extern const unsigned char arm64_relocate_new_kernel[];
+extern const unsigned long arm64_relocate_new_kernel_size;
+
+static unsigned long kimage_start;
+
+void machine_kexec_cleanup(struct kimage *kimage)
+{
+	/* Empty routine needed to avoid build errors. */
+}
+
+/**
+ * machine_kexec_prepare - Prepare for a kexec reboot.
+ *
+ * Called from the core kexec code when a kernel image is loaded.
+ */
+int machine_kexec_prepare(struct kimage *kimage)
+{
+	kimage_start = kimage->start;
+	return 0;
+}
+
+/**
+ * kexec_list_flush - Helper to flush the kimage list to PoC.
+ */
+static void kexec_list_flush(struct kimage *kimage)
+{
+	kimage_entry_t *entry;
+	unsigned int flag;
+
+	for (entry = &kimage->head, flag = 0; flag != IND_DONE; entry++) {
+		void *addr = kmap(phys_to_page(*entry & PAGE_MASK));
+
+		flag = *entry & IND_FLAGS;
+
+		switch (flag) {
+		case IND_INDIRECTION:
+			entry = (kimage_entry_t *)addr - 1;
+			__flush_dcache_area(addr, PAGE_SIZE);
+			break;
+		case IND_DESTINATION:
+			break;
+		case IND_SOURCE:
+			__flush_dcache_area(addr, PAGE_SIZE);
+			break;
+		case IND_DONE:
+			break;
+		default:
+			BUG();
+		}
+		kunmap(addr);
+	}
+}
+
+/**
+ * kexec_segment_flush - Helper to flush the kimage segments to PoC.
+ */
+static void kexec_segment_flush(const struct kimage *kimage)
+{
+	unsigned long i;
+
+	pr_devel("%s:\n", __func__);
+
+	for (i = 0; i < kimage->nr_segments; i++) {
+		pr_devel("  segment[%lu]: %016lx - %016lx, %lx bytes, %lu pages\n",
+			i,
+			kimage->segment[i].mem,
+			kimage->segment[i].mem + kimage->segment[i].memsz,
+			kimage->segment[i].memsz,
+			kimage->segment[i].memsz /  PAGE_SIZE);
+
+		__flush_dcache_area(phys_to_virt(kimage->segment[i].mem),
+			kimage->segment[i].memsz);
+	}
+}
+
+/**
+ * machine_kexec - Do the kexec reboot.
+ *
+ * Called from the core kexec code for a sys_reboot with LINUX_REBOOT_CMD_KEXEC.
+ */
+void machine_kexec(struct kimage *kimage)
+{
+	phys_addr_t reboot_code_buffer_phys;
+	void *reboot_code_buffer;
+
+	BUG_ON(num_online_cpus() > 1);
+
+	reboot_code_buffer_phys = page_to_phys(kimage->control_code_page);
+	reboot_code_buffer = kmap(kimage->control_code_page);
+
+	/*
+	 * Copy arm64_relocate_new_kernel to the reboot_code_buffer for use
+	 * after the kernel is shut down.
+	 */
+	memcpy(reboot_code_buffer, arm64_relocate_new_kernel,
+		arm64_relocate_new_kernel_size);
+
+	/* Flush the reboot_code_buffer in preparation for its execution. */
+	__flush_dcache_area(reboot_code_buffer, arm64_relocate_new_kernel_size);
+	flush_icache_range((uintptr_t)reboot_code_buffer,
+		arm64_relocate_new_kernel_size);
+
+	/* Flush the kimage list. */
+	kexec_list_flush(kimage);
+
+	/* Flush the new image if already in place. */
+	if (kimage->head & IND_DONE)
+		kexec_segment_flush(kimage);
+
+	pr_info("Bye!\n");
+
+	/* Disable all DAIF exceptions. */
+	asm volatile ("msr daifset, #0xf" : : : "memory");
+
+	setup_mm_for_reboot();
+
+	/*
+	 * cpu_soft_restart will shutdown the MMU, disable data caches, then
+	 * transfer control to the reboot_code_buffer which contains a copy of
+	 * the arm64_relocate_new_kernel routine.  arm64_relocate_new_kernel
+	 * uses physical addressing to relocate the new image to its final
+	 * position and transfers control to the image entry point when the
+	 * relocation is complete.
+	 */
+
+	cpu_soft_restart(is_hyp_mode_available(),
+		reboot_code_buffer_phys, kimage->head, kimage_start, 0);
+
+	BUG(); /* Should never get here. */
+}
+
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+	/* Empty routine needed to avoid build errors. */
+}
diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S
new file mode 100644
index 0000000..e380db3
--- /dev/null
+++ b/arch/arm64/kernel/relocate_kernel.S
@@ -0,0 +1,131 @@
+/*
+ * kexec for arm64
+ *
+ * Copyright (C) Linaro.
+ * Copyright (C) Huawei Futurewei Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kexec.h>
+
+#include <asm/assembler.h>
+#include <asm/kexec.h>
+#include <asm/page.h>
+#include <asm/sysreg.h>
+
+/*
+ * arm64_relocate_new_kernel - Put a 2nd stage image in place and boot it.
+ *
+ * The memory that the old kernel occupies may be overwritten when coping the
+ * new image to its final location.  To assure that the
+ * arm64_relocate_new_kernel routine which does that copy is not overwritten,
+ * all code and data needed by arm64_relocate_new_kernel must be between the
+ * symbols arm64_relocate_new_kernel and arm64_relocate_new_kernel_end.  The
+ * machine_kexec() routine will copy arm64_relocate_new_kernel to the kexec
+ * control_code_page, a special page which has been set up to be preserved
+ * during the copy operation.
+ */
+.globl arm64_relocate_new_kernel
+arm64_relocate_new_kernel:
+
+	/* Setup the list loop variables. */
+	mov	x18, x1				/* x18 = kimage_start */
+	mov	x17, x0				/* x17 = kimage_head */
+	dcache_line_size x16, x0		/* x16 = dcache line size */
+	mov	x15, xzr			/* x15 = segment start */
+	mov	x14, xzr			/* x14 = entry ptr */
+	mov	x13, xzr			/* x13 = copy dest */
+
+	/* Clear the sctlr_el2 flags. */
+	mrs	x0, CurrentEL
+	cmp	x0, #CurrentEL_EL2
+	b.ne	1f
+	mrs	x0, sctlr_el2
+	ldr	x1, =SCTLR_ELx_FLAGS
+	bic	x0, x0, x1
+	msr	sctlr_el2, x0
+	isb
+1:
+
+	/* Check if the new image needs relocation. */
+	cbz	x17, .Ldone
+	tbnz	x17, IND_DONE_BIT, .Ldone
+
+.Lloop:
+	and	x12, x17, PAGE_MASK		/* x12 = addr */
+
+	/* Test the entry flags. */
+.Ltest_source:
+	tbz	x17, IND_SOURCE_BIT, .Ltest_indirection
+
+	/* Invalidate dest page to PoC. */
+	mov     x0, x13
+	add     x20, x0, #PAGE_SIZE
+	sub     x1, x16, #1
+	bic     x0, x0, x1
+2:	dc      ivac, x0
+	add     x0, x0, x16
+	cmp     x0, x20
+	b.lo    2b
+	dsb     sy
+
+	mov x20, x13
+	mov x21, x12
+	copy_page x20, x21, x0, x1, x2, x3, x4, x5, x6, x7
+
+	/* dest += PAGE_SIZE */
+	add	x13, x13, PAGE_SIZE
+	b	.Lnext
+
+.Ltest_indirection:
+	tbz	x17, IND_INDIRECTION_BIT, .Ltest_destination
+
+	/* ptr = addr */
+	mov	x14, x12
+	b	.Lnext
+
+.Ltest_destination:
+	tbz	x17, IND_DESTINATION_BIT, .Lnext
+
+	mov	x15, x12
+
+	/* dest = addr */
+	mov	x13, x12
+
+.Lnext:
+	/* entry = *ptr++ */
+	ldr	x17, [x14], #8
+
+	/* while (!(entry & DONE)) */
+	tbz	x17, IND_DONE_BIT, .Lloop
+
+.Ldone:
+	dsb	nsh
+	ic	iallu
+	dsb	nsh
+	isb
+
+	/* Start new image. */
+	mov	x0, xzr
+	mov	x1, xzr
+	mov	x2, xzr
+	mov	x3, xzr
+	br	x18
+
+.ltorg
+
+.align 3	/* To keep the 64-bit values below naturally aligned. */
+
+.Lcopy_end:
+.org	KEXEC_CONTROL_PAGE_SIZE
+
+/*
+ * arm64_relocate_new_kernel_size - Number of bytes to copy to the
+ * control_code_page.
+ */
+.globl arm64_relocate_new_kernel_size
+arm64_relocate_new_kernel_size:
+	.quad	.Lcopy_end - arm64_relocate_new_kernel
diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h
index 99048e5..aae5ebf 100644
--- a/include/uapi/linux/kexec.h
+++ b/include/uapi/linux/kexec.h
@@ -39,6 +39,7 @@
 #define KEXEC_ARCH_SH      (42 << 16)
 #define KEXEC_ARCH_MIPS_LE (10 << 16)
 #define KEXEC_ARCH_MIPS    ( 8 << 16)
+#define KEXEC_ARCH_AARCH64 (183 << 16)
 
 /* The artificial cap on the number of segments passed to kexec_load. */
 #define KEXEC_SEGMENT_MAX 16
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 174+ messages in thread

* [PATCH 12/19] arm64/kexec: Enable kexec in the arm64 defconfig
  2016-01-15 19:18 ` Geoff Levand
@ 2016-01-15 19:18   ` Geoff Levand
  -1 siblings, 0 replies; 174+ messages in thread
From: Geoff Levand @ 2016-01-15 19:18 UTC (permalink / raw)
  To: linux-arm-kernel

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/configs/defconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index bdd7aa3..0470fdf 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -58,6 +58,7 @@ CONFIG_PREEMPT=y
 CONFIG_KSM=y
 CONFIG_TRANSPARENT_HUGEPAGE=y
 CONFIG_CMA=y
+CONFIG_KEXEC=y
 CONFIG_CMDLINE="console=ttyAMA0"
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
 CONFIG_COMPAT=y
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 174+ messages in thread

* [PATCH 13/19] arm64/kexec: Add pr_debug output
  2016-01-15 19:18 ` Geoff Levand
@ 2016-01-15 19:18   ` Geoff Levand
  -1 siblings, 0 replies; 174+ messages in thread
From: Geoff Levand @ 2016-01-15 19:18 UTC (permalink / raw)
  To: linux-arm-kernel

To aid in debugging kexec problems or when adding new functionality to kexec add
a new routine kexec_image_info() and several inline pr_debug statements.

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/kernel/machine_kexec.c | 70 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 68 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
index 9379d31..a375268 100644
--- a/arch/arm64/kernel/machine_kexec.c
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -11,6 +11,7 @@
 
 #include <linux/highmem.h>
 #include <linux/kexec.h>
+#include <linux/libfdt_env.h>
 #include <linux/of_fdt.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
@@ -26,6 +27,50 @@ extern const unsigned long arm64_relocate_new_kernel_size;
 
 static unsigned long kimage_start;
 
+/**
+ * kexec_is_dtb - Helper routine to check the device tree header signature.
+ */
+static bool kexec_is_dtb(const void *dtb)
+{
+	__be32 magic;
+
+	if (get_user(magic, (__be32 *)dtb))
+		return false;
+
+	return fdt32_to_cpu(magic) == OF_DT_HEADER;
+}
+
+/**
+ * kexec_image_info - For debugging output.
+ */
+#define kexec_image_info(_i) _kexec_image_info(__func__, __LINE__, _i)
+static void _kexec_image_info(const char *func, int line,
+	const struct kimage *kimage)
+{
+	unsigned long i;
+
+#ifndef DEBUG
+	return;
+#endif
+	pr_debug("%s:%d:\n", func, line);
+	pr_debug("  kexec kimage info:\n");
+	pr_debug("    type:        %d\n", kimage->type);
+	pr_debug("    start:       %lx\n", kimage->start);
+	pr_debug("    head:        %lx\n", kimage->head);
+	pr_debug("    nr_segments: %lu\n", kimage->nr_segments);
+
+	for (i = 0; i < kimage->nr_segments; i++) {
+		pr_debug("      segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages%s\n",
+			i,
+			kimage->segment[i].mem,
+			kimage->segment[i].mem + kimage->segment[i].memsz,
+			kimage->segment[i].memsz,
+			kimage->segment[i].memsz /  PAGE_SIZE,
+			(kexec_is_dtb(kimage->segment[i].buf) ?
+				", dtb segment" : ""));
+	}
+}
+
 void machine_kexec_cleanup(struct kimage *kimage)
 {
 	/* Empty routine needed to avoid build errors. */
@@ -39,6 +84,8 @@ void machine_kexec_cleanup(struct kimage *kimage)
 int machine_kexec_prepare(struct kimage *kimage)
 {
 	kimage_start = kimage->start;
+	kexec_image_info(kimage);
+
 	return 0;
 }
 
@@ -81,10 +128,10 @@ static void kexec_segment_flush(const struct kimage *kimage)
 {
 	unsigned long i;
 
-	pr_devel("%s:\n", __func__);
+	pr_debug("%s:\n", __func__);
 
 	for (i = 0; i < kimage->nr_segments; i++) {
-		pr_devel("  segment[%lu]: %016lx - %016lx, %lx bytes, %lu pages\n",
+		pr_debug("  segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n",
 			i,
 			kimage->segment[i].mem,
 			kimage->segment[i].mem + kimage->segment[i].memsz,
@@ -111,6 +158,25 @@ void machine_kexec(struct kimage *kimage)
 	reboot_code_buffer_phys = page_to_phys(kimage->control_code_page);
 	reboot_code_buffer = kmap(kimage->control_code_page);
 
+	kexec_image_info(kimage);
+
+	pr_debug("%s:%d: control_code_page:        %p\n", __func__, __LINE__,
+		kimage->control_code_page);
+	pr_debug("%s:%d: reboot_code_buffer_phys:  %pa\n", __func__, __LINE__,
+		&reboot_code_buffer_phys);
+	pr_debug("%s:%d: reboot_code_buffer:       %p\n", __func__, __LINE__,
+		reboot_code_buffer);
+	pr_debug("%s:%d: relocate_new_kernel:      %p\n", __func__, __LINE__,
+		arm64_relocate_new_kernel);
+	pr_debug("%s:%d: relocate_new_kernel_size: 0x%lx(%lu) bytes\n",
+		__func__, __LINE__, arm64_relocate_new_kernel_size,
+		arm64_relocate_new_kernel_size);
+
+	pr_debug("%s:%d: kimage_head:              %lx\n", __func__, __LINE__,
+		kimage->head);
+	pr_debug("%s:%d: kimage_start:             %lx\n", __func__, __LINE__,
+		kimage_start);
+
 	/*
 	 * Copy arm64_relocate_new_kernel to the reboot_code_buffer for use
 	 * after the kernel is shut down.
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 174+ messages in thread

* [PATCH 10/19] arm64: kvm: allows kvm cpu hotplug
  2016-01-15 19:18 ` Geoff Levand
@ 2016-01-15 19:18   ` Geoff Levand
  -1 siblings, 0 replies; 174+ messages in thread
From: Geoff Levand @ 2016-01-15 19:18 UTC (permalink / raw)
  To: linux-arm-kernel

From: AKASHI Takahiro <takahiro.akashi@linaro.org>

The current kvm implementation on arm64 does cpu-specific initialization
at system boot, and has no way to gracefully shutdown a core in terms of
kvm. This prevents, especially, kexec from rebooting the system on a boot
core in EL2.

This patch adds a cpu tear-down function and also puts an existing cpu-init
code into a separate function, kvm_arch_hardware_disable() and
kvm_arch_hardware_enable() respectively.
We don't need arm64-specific cpu hotplug hook any more.

Since this patch modifies common part of code between arm and arm64, one
stub definition, __cpu_reset_hyp_mode(), is added on arm side to avoid
compiling errors.

Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
---
 arch/arm/include/asm/kvm_host.h   | 10 ++++-
 arch/arm/include/asm/kvm_mmu.h    |  1 +
 arch/arm/kvm/arm.c                | 93 +++++++++++++++++++++++----------------
 arch/arm/kvm/mmu.c                |  5 +++
 arch/arm64/include/asm/kvm_host.h | 11 ++++-
 arch/arm64/include/asm/kvm_mmu.h  |  1 +
 arch/arm64/include/asm/virt.h     |  9 ++++
 arch/arm64/kvm/hyp-init.S         | 33 ++++++++++++++
 arch/arm64/kvm/hyp.S              | 32 ++++++++++++--
 9 files changed, 151 insertions(+), 44 deletions(-)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 6692982..9242765 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -214,6 +214,15 @@ static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr,
 	kvm_call_hyp((void*)hyp_stack_ptr, vector_ptr, pgd_ptr);
 }
 
+static inline void __cpu_reset_hyp_mode(phys_addr_t boot_pgd_ptr,
+					phys_addr_t phys_idmap_start)
+{
+	/*
+	 * TODO
+	 * kvm_call_reset(boot_pgd_ptr, phys_idmap_start);
+	 */
+}
+
 static inline int kvm_arch_dev_ioctl_check_extension(long ext)
 {
 	return 0;
@@ -226,7 +235,6 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
 
 struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
 
-static inline void kvm_arch_hardware_disable(void) {}
 static inline void kvm_arch_hardware_unsetup(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 405aa18..dc6fadf 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -66,6 +66,7 @@ void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu);
 phys_addr_t kvm_mmu_get_httbr(void);
 phys_addr_t kvm_mmu_get_boot_httbr(void);
 phys_addr_t kvm_get_idmap_vector(void);
+phys_addr_t kvm_get_idmap_start(void);
 int kvm_mmu_init(void);
 void kvm_clear_hyp_idmap(void);
 
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index e06fd29..e91f80e 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -16,7 +16,6 @@
  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  */
 
-#include <linux/cpu.h>
 #include <linux/cpu_pm.h>
 #include <linux/errno.h>
 #include <linux/err.h>
@@ -61,6 +60,8 @@ static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
 static u8 kvm_next_vmid;
 static DEFINE_SPINLOCK(kvm_vmid_lock);
 
+static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled);
+
 static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)
 {
 	BUG_ON(preemptible());
@@ -85,11 +86,6 @@ struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void)
 	return &kvm_arm_running_vcpu;
 }
 
-int kvm_arch_hardware_enable(void)
-{
-	return 0;
-}
-
 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
 {
 	return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
@@ -577,7 +573,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		/*
 		 * Re-check atomic conditions
 		 */
-		if (signal_pending(current)) {
+		if (unlikely(!__this_cpu_read(kvm_arm_hardware_enabled))) {
+			/* cpu has been torn down */
+			ret = 0;
+			run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+			run->fail_entry.hardware_entry_failure_reason
+					= (u64)-ENOEXEC;
+		} else if (signal_pending(current)) {
 			ret = -EINTR;
 			run->exit_reason = KVM_EXIT_INTR;
 		}
@@ -954,7 +956,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
 	}
 }
 
-static void cpu_init_hyp_mode(void *dummy)
+static void cpu_init_hyp_mode(void)
 {
 	phys_addr_t boot_pgd_ptr;
 	phys_addr_t pgd_ptr;
@@ -976,36 +978,56 @@ static void cpu_init_hyp_mode(void *dummy)
 	kvm_arm_init_debug();
 }
 
-static int hyp_init_cpu_notify(struct notifier_block *self,
-			       unsigned long action, void *cpu)
+static void cpu_reset_hyp_mode(void)
 {
-	switch (action) {
-	case CPU_STARTING:
-	case CPU_STARTING_FROZEN:
-		if (__hyp_get_vectors() == hyp_default_vectors)
-			cpu_init_hyp_mode(NULL);
-		break;
+	phys_addr_t boot_pgd_ptr;
+	phys_addr_t phys_idmap_start;
+
+	boot_pgd_ptr = kvm_mmu_get_boot_httbr();
+	phys_idmap_start = kvm_get_idmap_start();
+
+	__cpu_reset_hyp_mode(boot_pgd_ptr, phys_idmap_start);
+}
+
+int kvm_arch_hardware_enable(void)
+{
+	if (!__this_cpu_read(kvm_arm_hardware_enabled)) {
+		cpu_init_hyp_mode();
+		__this_cpu_write(kvm_arm_hardware_enabled, 1);
 	}
 
-	return NOTIFY_OK;
+	return 0;
 }
 
-static struct notifier_block hyp_init_cpu_nb = {
-	.notifier_call = hyp_init_cpu_notify,
-};
+void kvm_arch_hardware_disable(void)
+{
+	if (!__this_cpu_read(kvm_arm_hardware_enabled))
+		return;
+
+	cpu_reset_hyp_mode();
+	__this_cpu_write(kvm_arm_hardware_enabled, 0);
+}
 
 #ifdef CONFIG_CPU_PM
 static int hyp_init_cpu_pm_notifier(struct notifier_block *self,
 				    unsigned long cmd,
 				    void *v)
 {
-	if (cmd == CPU_PM_EXIT &&
-	    __hyp_get_vectors() == hyp_default_vectors) {
-		cpu_init_hyp_mode(NULL);
+	switch (cmd) {
+	case CPU_PM_ENTER:
+		if (__this_cpu_read(kvm_arm_hardware_enabled))
+			cpu_reset_hyp_mode();
+
 		return NOTIFY_OK;
-	}
+	case CPU_PM_EXIT:
+		if (__this_cpu_read(kvm_arm_hardware_enabled))
+			cpu_init_hyp_mode();
 
-	return NOTIFY_DONE;
+		return NOTIFY_OK;
+
+	default:
+		return NOTIFY_DONE;
+	}
 }
 
 static struct notifier_block hyp_init_cpu_pm_nb = {
@@ -1103,14 +1125,20 @@ static int init_hyp_mode(void)
 	}
 
 	/*
-	 * Execute the init code on each CPU.
+	 * Init this CPU temporarily to execute kvm_hyp_call()
+	 * during kvm_vgic_hyp_init().
 	 */
-	on_each_cpu(cpu_init_hyp_mode, NULL, 1);
+	preempt_disable();
+	cpu_init_hyp_mode();
 
 	/*
 	 * Init HYP view of VGIC
 	 */
 	err = kvm_vgic_hyp_init();
+
+	cpu_reset_hyp_mode();
+	preempt_enable();
+
 	if (err)
 		goto out_free_context;
 
@@ -1181,26 +1209,15 @@ int kvm_arch_init(void *opaque)
 		}
 	}
 
-	cpu_notifier_register_begin();
-
 	err = init_hyp_mode();
 	if (err)
 		goto out_err;
 
-	err = __register_cpu_notifier(&hyp_init_cpu_nb);
-	if (err) {
-		kvm_err("Cannot register HYP init CPU notifier (%d)\n", err);
-		goto out_err;
-	}
-
-	cpu_notifier_register_done();
-
 	hyp_cpu_pm_init();
 
 	kvm_coproc_table_init();
 	return 0;
 out_err:
-	cpu_notifier_register_done();
 	return err;
 }
 
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 61d96a6..e6651dd 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -1643,6 +1643,11 @@ phys_addr_t kvm_get_idmap_vector(void)
 	return hyp_idmap_vector;
 }
 
+phys_addr_t kvm_get_idmap_start(void)
+{
+	return hyp_idmap_start;
+}
+
 int kvm_mmu_init(void)
 {
 	int err;
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index a35ce72..de08e4c 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -223,6 +223,7 @@ struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
 struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
 
 u64 kvm_call_hyp(void *hypfn, ...);
+void kvm_call_reset(phys_addr_t boot_pgd_ptr, phys_addr_t phys_idmap_start);
 void force_vm_exit(const cpumask_t *mask);
 void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
 
@@ -247,7 +248,15 @@ static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr,
 		     hyp_stack_ptr, vector_ptr);
 }
 
-static inline void kvm_arch_hardware_disable(void) {}
+static inline void __cpu_reset_hyp_mode(phys_addr_t boot_pgd_ptr,
+					phys_addr_t phys_idmap_start)
+{
+	/*
+	 * Call reset code, and switch back to stub hyp vectors.
+	 */
+	kvm_call_reset(boot_pgd_ptr, phys_idmap_start);
+}
+
 static inline void kvm_arch_hardware_unsetup(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 6150567..ff5a087 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -98,6 +98,7 @@ void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu);
 phys_addr_t kvm_mmu_get_httbr(void);
 phys_addr_t kvm_mmu_get_boot_httbr(void);
 phys_addr_t kvm_get_idmap_vector(void);
+phys_addr_t kvm_get_idmap_start(void);
 int kvm_mmu_init(void);
 void kvm_clear_hyp_idmap(void);
 
diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 3070096..bca79f9 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -58,9 +58,18 @@
 
 #define HVC_CALL_FUNC 3
 
+/*
+ * HVC_RESET_CPU - Reset cpu in EL2 to initial state.
+ *
+ * @x0: entry address in trampoline code in va
+ * @x1: identical mapping page table in pa
+ */
+
 #define BOOT_CPU_MODE_EL1	(0xe11)
 #define BOOT_CPU_MODE_EL2	(0xe12)
 
+#define HVC_RESET_CPU 4
+
 #ifndef __ASSEMBLY__
 
 /*
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
index 1d7e502..d909ce2 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -140,6 +140,39 @@ merged:
 	eret
 ENDPROC(__kvm_hyp_init)
 
+	/*
+	 * x0: HYP boot pgd
+	 * x1: HYP phys_idmap_start
+	 */
+ENTRY(__kvm_hyp_reset)
+	/* We're in trampoline code in VA, switch back to boot page tables */
+	msr	ttbr0_el2, x0
+	isb
+
+	/* Invalidate the old TLBs */
+	tlbi	alle2
+	dsb	sy
+
+	/* Branch into PA space */
+	adr	x0, 1f
+	bfi	x1, x0, #0, #PAGE_SHIFT
+	br	x1
+
+	/* We're now in idmap, disable MMU */
+1:	mrs	x0, sctlr_el2
+	ldr	x1, =SCTLR_ELx_FLAGS
+	bic	x0, x0, x1		// Clear SCTL_M and etc
+	msr	sctlr_el2, x0
+	isb
+
+	/* Install stub vectors */
+	adrp	x0, __hyp_stub_vectors
+	add	x0, x0, #:lo12:__hyp_stub_vectors
+	msr	vbar_el2, x0
+
+	eret
+ENDPROC(__kvm_hyp_reset)
+
 	.ltorg
 
 	.popsection
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 15b1ef9..ed82dc2 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -943,6 +943,11 @@ ENTRY(kvm_call_hyp)
 	ret
 ENDPROC(kvm_call_hyp)
 
+ENTRY(kvm_call_reset)
+	hvc	#HVC_RESET_CPU
+	ret
+ENDPROC(kvm_call_reset)
+
 .macro invalid_vector	label, target
 	.align	2
 \label:
@@ -986,10 +991,27 @@ el1_sync:					// Guest trapped into EL2
 	cmp	x18, #HVC_GET_VECTORS
 	b.ne	1f
 	mrs	x0, vbar_el2
-	b	2f
-
-1:	/* Default to HVC_CALL_HYP. */
+	b	do_eret
 
+	/* jump into trampoline code */
+1:	cmp	x18, #HVC_RESET_CPU
+	b.ne	2f
+	/*
+	 * Entry point is:
+	 *	TRAMPOLINE_VA
+	 *	+ (__kvm_hyp_reset - (__hyp_idmap_text_start & PAGE_MASK))
+	 */
+	adrp	x2, __kvm_hyp_reset
+	add	x2, x2, #:lo12:__kvm_hyp_reset
+	adrp	x3, __hyp_idmap_text_start
+	add	x3, x3, #:lo12:__hyp_idmap_text_start
+	and	x3, x3, PAGE_MASK
+	sub	x2, x2, x3
+	ldr	x3, =TRAMPOLINE_VA
+	add	x2, x2, x3
+	br	x2				// no return
+
+2:	/* Default to HVC_CALL_HYP. */
 	push	lr, xzr
 
 	/*
@@ -1003,7 +1025,9 @@ el1_sync:					// Guest trapped into EL2
 	blr	lr
 
 	pop	lr, xzr
-2:	eret
+
+do_eret:
+	eret
 
 el1_trap:
 	/*
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 174+ messages in thread

* [PATCH 19/19] arm64: kdump: relax BUG_ON() if more than one cpus are still active
  2016-01-15 19:18 ` Geoff Levand
@ 2016-01-15 19:18   ` Geoff Levand
  -1 siblings, 0 replies; 174+ messages in thread
From: Geoff Levand @ 2016-01-15 19:18 UTC (permalink / raw)
  To: linux-arm-kernel

From: AKASHI Takahiro <takahiro.akashi@linaro.org>

We should try best in case of kdump.
So even if not all secondary cpus have shut down, we do kdump anyway.

Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
---
 arch/arm64/kernel/machine_kexec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
index 0ba2ae4..3230551 100644
--- a/arch/arm64/kernel/machine_kexec.c
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -157,7 +157,7 @@ void machine_kexec(struct kimage *kimage)
 	phys_addr_t reboot_code_buffer_phys;
 	void *reboot_code_buffer;
 
-	BUG_ON(num_online_cpus() > 1);
+	BUG_ON((num_online_cpus() > 1) && !WARN_ON(in_crash_kexec));
 
 	reboot_code_buffer_phys = page_to_phys(kimage->control_code_page);
 	reboot_code_buffer = kmap(kimage->control_code_page);
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 174+ messages in thread

* [PATCH 15/19] arm64: kdump: implement machine_crash_shutdown()
  2016-01-15 19:18 ` Geoff Levand
@ 2016-01-15 19:18   ` Geoff Levand
  -1 siblings, 0 replies; 174+ messages in thread
From: Geoff Levand @ 2016-01-15 19:18 UTC (permalink / raw)
  To: linux-arm-kernel

From: AKASHI Takahiro <takahiro.akashi@linaro.org>

kdump calls machine_crash_shutdown() to shut down non-boot cpus and
save registers' status in per-cpu ELF notes before starting the crash
dump kernel. See kernel_kexec().

ipi_cpu_stop() is a bit modified and used to support this behavior.

Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
---
 arch/arm64/include/asm/kexec.h    | 47 +++++++++++++++++++++++++++-
 arch/arm64/kernel/machine_kexec.c | 65 +++++++++++++++++++++++++++++++++++++--
 arch/arm64/kernel/smp.c           | 18 ++++++++---
 3 files changed, 123 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
index 04744dc..3dffb1d 100644
--- a/arch/arm64/include/asm/kexec.h
+++ b/arch/arm64/include/asm/kexec.h
@@ -30,6 +30,21 @@
 
 #ifndef __ASSEMBLY__
 
+extern bool in_crash_kexec;
+
+static inline bool is_in_crash_kexec(void)
+{
+#ifdef CONFIG_KEXEC_CORE
+	return in_crash_kexec;
+#else
+	return false;
+#endif
+}
+
+#ifndef CONFIG_KEXEC_CORE
+#define crash_save_cpu(regs, cpu)
+#endif
+
 /**
  * crash_setup_regs() - save registers for the panic kernel
  *
@@ -40,7 +55,37 @@
 static inline void crash_setup_regs(struct pt_regs *newregs,
 				    struct pt_regs *oldregs)
 {
-	/* Empty routine needed to avoid build errors. */
+	if (oldregs) {
+		memcpy(newregs, oldregs, sizeof(*newregs));
+	} else {
+		__asm__ __volatile__ (
+			"stp	 x0,   x1, [%3, #16 *  0]\n"
+			"stp	 x2,   x3, [%3, #16 *  1]\n"
+			"stp	 x4,   x5, [%3, #16 *  2]\n"
+			"stp	 x6,   x7, [%3, #16 *  3]\n"
+			"stp	 x8,   x9, [%3, #16 *  4]\n"
+			"stp	x10,  x11, [%3, #16 *  5]\n"
+			"stp	x12,  x13, [%3, #16 *  6]\n"
+			"stp	x14,  x15, [%3, #16 *  7]\n"
+			"stp	x16,  x17, [%3, #16 *  8]\n"
+			"stp	x18,  x19, [%3, #16 *  9]\n"
+			"stp	x20,  x21, [%3, #16 * 10]\n"
+			"stp	x22,  x23, [%3, #16 * 11]\n"
+			"stp	x24,  x25, [%3, #16 * 12]\n"
+			"stp	x26,  x27, [%3, #16 * 13]\n"
+			"stp	x28,  x29, [%3, #16 * 14]\n"
+			"str	x30,	   [%3, #16 * 15]\n"
+			"mov	%0, sp\n"
+			"adr	%1, 1f\n"
+			"mrs	%2, spsr_el1\n"
+		"1:"
+			: "=r" (newregs->sp),
+			  "=r" (newregs->pc),
+			  "=r" (newregs->pstate)
+			: "r"  (&newregs->regs)
+			: "memory"
+		);
+	}
 }
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
index a375268..0ba2ae4 100644
--- a/arch/arm64/kernel/machine_kexec.c
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -10,6 +10,9 @@
  */
 
 #include <linux/highmem.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
 #include <linux/kexec.h>
 #include <linux/libfdt_env.h>
 #include <linux/of_fdt.h>
@@ -25,6 +28,7 @@
 extern const unsigned char arm64_relocate_new_kernel[];
 extern const unsigned long arm64_relocate_new_kernel_size;
 
+bool in_crash_kexec;
 static unsigned long kimage_start;
 
 /**
@@ -212,13 +216,70 @@ void machine_kexec(struct kimage *kimage)
 	 * relocation is complete.
 	 */
 
-	cpu_soft_restart(is_hyp_mode_available(),
+	cpu_soft_restart(in_crash_kexec ? 0 : is_hyp_mode_available(),
 		reboot_code_buffer_phys, kimage->head, kimage_start, 0);
 
 	BUG(); /* Should never get here. */
 }
 
+static void machine_kexec_mask_interrupts(void)
+{
+	unsigned int i;
+	struct irq_desc *desc;
+
+	for_each_irq_desc(i, desc) {
+		struct irq_chip *chip;
+		int ret;
+
+		chip = irq_desc_get_chip(desc);
+		if (!chip)
+			continue;
+
+		/*
+		 * First try to remove the active state. If this
+		 * fails, try to EOI the interrupt.
+		 */
+		ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false);
+
+		if (ret && irqd_irq_inprogress(&desc->irq_data) &&
+		    chip->irq_eoi)
+			chip->irq_eoi(&desc->irq_data);
+
+		if (chip->irq_mask)
+			chip->irq_mask(&desc->irq_data);
+
+		if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data))
+			chip->irq_disable(&desc->irq_data);
+	}
+}
+
+/**
+ * machine_crash_shutdown - shutdown non-crashing cpus and save registers
+ */
 void machine_crash_shutdown(struct pt_regs *regs)
 {
-	/* Empty routine needed to avoid build errors. */
+	struct pt_regs dummy_regs;
+	int cpu;
+
+	local_irq_disable();
+
+	in_crash_kexec = true;
+
+	/*
+	 * clear and initialize the per-cpu info. This is necessary
+	 * because, otherwise, slots for offline cpus would never be
+	 * filled up. See smp_send_stop().
+	 */
+	memset(&dummy_regs, 0, sizeof(dummy_regs));
+	for_each_possible_cpu(cpu)
+		crash_save_cpu(&dummy_regs, cpu);
+
+	/* shutdown non-crashing cpus */
+	smp_send_stop();
+
+	/* for crashing cpu */
+	crash_save_cpu(regs, smp_processor_id());
+	machine_kexec_mask_interrupts();
+
+	pr_info("Starting crashdump kernel...\n");
 }
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index b1adc51..aa45c21 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -37,6 +37,7 @@
 #include <linux/completion.h>
 #include <linux/of.h>
 #include <linux/irq_work.h>
+#include <linux/kexec.h>
 
 #include <asm/alternative.h>
 #include <asm/atomic.h>
@@ -44,6 +45,7 @@
 #include <asm/cpu.h>
 #include <asm/cputype.h>
 #include <asm/cpu_ops.h>
+#include <asm/kexec.h>
 #include <asm/mmu_context.h>
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -54,6 +56,8 @@
 #include <asm/ptrace.h>
 #include <asm/virt.h>
 
+#include "cpu-reset.h"
+
 #define CREATE_TRACE_POINTS
 #include <trace/events/ipi.h>
 
@@ -683,10 +687,16 @@ static DEFINE_RAW_SPINLOCK(stop_lock);
 /*
  * ipi_cpu_stop - handle IPI from smp_send_stop()
  */
-static void ipi_cpu_stop(unsigned int cpu)
+static void ipi_cpu_stop(unsigned int cpu, struct pt_regs *regs)
 {
-	if (system_state == SYSTEM_BOOTING ||
-	    system_state == SYSTEM_RUNNING) {
+	if (is_in_crash_kexec()) {
+		crash_save_cpu(regs, cpu);
+		/*
+		 * printing messages at panic may slow down the shutdown.
+		 * So don't fall through dump_stack().
+		 */
+	} else if (system_state == SYSTEM_BOOTING ||
+			system_state == SYSTEM_RUNNING) {
 		raw_spin_lock(&stop_lock);
 		pr_crit("CPU%u: stopping\n", cpu);
 		dump_stack();
@@ -727,7 +737,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
 
 	case IPI_CPU_STOP:
 		irq_enter();
-		ipi_cpu_stop(cpu);
+		ipi_cpu_stop(cpu, regs);
 		irq_exit();
 		break;
 
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 174+ messages in thread

* [PATCH 18/19] arm64: kdump: update a kernel doc
@ 2016-01-15 19:18   ` Geoff Levand
  0 siblings, 0 replies; 174+ messages in thread
From: Geoff Levand @ 2016-01-15 19:18 UTC (permalink / raw)
  To: Catalin Marinas, Will Deacon
  Cc: Mark Rutland, marc.zyngier, kexec, AKASHI Takahiro, James Morse,
	linux-arm-kernel, christoffer.dall

From: AKASHI Takahiro <takahiro.akashi@linaro.org>

This patch adds arch specific descriptions about kdump usage on arm64
to kdump.txt.

Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
---
 Documentation/kdump/kdump.txt | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
index bc4bd5a..36cf978 100644
--- a/Documentation/kdump/kdump.txt
+++ b/Documentation/kdump/kdump.txt
@@ -18,7 +18,7 @@ memory image to a dump file on the local disk, or across the network to
 a remote system.
 
 Kdump and kexec are currently supported on the x86, x86_64, ppc64, ia64,
-s390x and arm architectures.
+s390x, arm and arm64 architectures.
 
 When the system kernel boots, it reserves a small section of memory for
 the dump-capture kernel. This ensures that ongoing Direct Memory Access
@@ -249,6 +249,20 @@ Dump-capture kernel config options (Arch Dependent, arm)
 
     AUTO_ZRELADDR=y
 
+Dump-capture kernel config options (Arch Dependent, arm64)
+----------------------------------------------------------
+
+1) The maximum memory size on the dump-capture kernel must be limited by
+   specifying:
+
+   mem=X[MG]
+
+   where X should be less than or equal to the size in "crashkernel="
+   boot parameter. Kexec-tools will automatically add this.
+
+2) Currently, kvm will not be enabled on the dump-capture kernel even
+   if it is configured.
+
 Extended crashkernel syntax
 ===========================
 
@@ -312,6 +326,8 @@ Boot into System Kernel
    any space below the alignment point may be overwritten by the dump-capture kernel,
    which means it is possible that the vmcore is not that precise as expected.
 
+   On arm64, use "crashkernel=Y[@X]".  Note that the start address of
+   the kernel, X if explicitly specified, must be aligned to 2MiB (0x200000).
 
 Load the Dump-capture Kernel
 ============================
@@ -334,6 +350,8 @@ For s390x:
 	- Use image or bzImage
 For arm:
 	- Use zImage
+For arm64:
+	- Use vmlinux or Image
 
 If you are using a uncompressed vmlinux image then use following command
 to load dump-capture kernel.
@@ -377,6 +395,9 @@ For s390x:
 For arm:
 	"1 maxcpus=1 reset_devices"
 
+For arm64:
+	"1 mem=X[MG] maxcpus=1 reset_devices"
+
 Notes on loading the dump-capture kernel:
 
 * By default, the ELF headers are stored in ELF64 format to support
-- 
2.5.0



_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 174+ messages in thread

* [PATCH 16/19] arm64: kdump: add kdump support
@ 2016-01-15 19:18   ` Geoff Levand
  0 siblings, 0 replies; 174+ messages in thread
From: Geoff Levand @ 2016-01-15 19:18 UTC (permalink / raw)
  To: Catalin Marinas, Will Deacon
  Cc: Mark Rutland, marc.zyngier, kexec, AKASHI Takahiro, James Morse,
	linux-arm-kernel, christoffer.dall

From: AKASHI Takahiro <takahiro.akashi@linaro.org>

On crash dump kernel, all the information about primary kernel's core
image is available in elf core header specified by "elfcorehdr=" boot
parameter. reserve_elfcorehdr() will set aside the region to avoid any
corruption by crash dump kernel.

Crash dump kernel will access the system memory of primary kernel via
copy_oldmem_page(), which reads one page by ioremap'ing it since it does
not reside in linear mapping on crash dump kernel.
Please note that we should add "mem=X[MG]" boot parameter to limit the
memory size and avoid the following assertion at ioremap():
	if (WARN_ON(pfn_valid(__phys_to_pfn(phys_addr))))
		return NULL;
when accessing any pages beyond the usable memories of crash dump kernel.

We also need our own elfcorehdr_read() here since the weak definition of
elfcorehdr_read() utilizes copy_oldmem_page() and will hit the assertion
above on arm64.

Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
---
 arch/arm64/Kconfig             | 11 +++++++
 arch/arm64/kernel/Makefile     |  1 +
 arch/arm64/kernel/crash_dump.c | 71 ++++++++++++++++++++++++++++++++++++++++++
 arch/arm64/mm/init.c           | 32 +++++++++++++++++++
 4 files changed, 115 insertions(+)
 create mode 100644 arch/arm64/kernel/crash_dump.c

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index b7d0078..31901d0 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -566,6 +566,17 @@ config KEXEC
 	  but it is independent of the system firmware.   And like a reboot
 	  you can start any kernel with it, not just Linux.
 
+config CRASH_DUMP
+	bool "Build kdump crash kernel"
+	help
+	  Generate crash dump after being started by kexec. This should
+	  be normally only set in special crash dump kernels which are
+	  loaded in the main kernel with kexec-tools into a specially
+	  reserved region and then later executed after a crash by
+	  kdump/kexec.
+
+	  For more details see Documentation/kdump/kdump.txt
+
 config XEN_DOM0
 	def_bool y
 	depends on XEN
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index f68420d..a08b054 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -43,6 +43,7 @@ arm64-obj-$(CONFIG_ARMV8_DEPRECATED)	+= armv8_deprecated.o
 arm64-obj-$(CONFIG_ACPI)		+= acpi.o
 arm64-obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o	\
 					   cpu-reset.o
+arm64-obj-$(CONFIG_CRASH_DUMP)		+= crash_dump.o
 
 obj-y					+= $(arm64-obj-y) vdso/
 obj-m					+= $(arm64-obj-m)
diff --git a/arch/arm64/kernel/crash_dump.c b/arch/arm64/kernel/crash_dump.c
new file mode 100644
index 0000000..2dc54d1
--- /dev/null
+++ b/arch/arm64/kernel/crash_dump.c
@@ -0,0 +1,71 @@
+/*
+ * Routines for doing kexec-based kdump
+ *
+ * Copyright (C) 2014 Linaro Limited
+ * Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/crash_dump.h>
+#include <linux/errno.h>
+#include <linux/io.h>
+#include <linux/memblock.h>
+#include <linux/uaccess.h>
+#include <asm/memory.h>
+
+/**
+ * copy_oldmem_page() - copy one page from old kernel memory
+ * @pfn: page frame number to be copied
+ * @buf: buffer where the copied page is placed
+ * @csize: number of bytes to copy
+ * @offset: offset in bytes into the page
+ * @userbuf: if set, @buf is in a user address space
+ *
+ * This function copies one page from old kernel memory into buffer pointed by
+ * @buf. If @buf is in userspace, set @userbuf to %1. Returns number of bytes
+ * copied or negative error in case of failure.
+ */
+ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
+			 size_t csize, unsigned long offset,
+			 int userbuf)
+{
+	void *vaddr;
+
+	if (!csize)
+		return 0;
+
+	vaddr = ioremap_cache(__pfn_to_phys(pfn), PAGE_SIZE);
+	if (!vaddr)
+		return -ENOMEM;
+
+	if (userbuf) {
+		if (copy_to_user(buf, vaddr + offset, csize)) {
+			iounmap(vaddr);
+			return -EFAULT;
+		}
+	} else {
+		memcpy(buf, vaddr + offset, csize);
+	}
+
+	iounmap(vaddr);
+
+	return csize;
+}
+
+/**
+ * elfcorehdr_read - read from ELF core header
+ * @buf: buffer where the data is placed
+ * @csize: number of bytes to read
+ * @ppos: address in the memory
+ *
+ * This function reads @count bytes from elf core header which exists
+ * on crash dump kernel's memory.
+ */
+ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos)
+{
+	memcpy(buf, phys_to_virt((phys_addr_t)*ppos), count);
+	return count;
+}
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index a8eae6b..7398a6b 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -35,6 +35,7 @@
 #include <linux/efi.h>
 #include <linux/swiotlb.h>
 #include <linux/kexec.h>
+#include <linux/crash_dump.h>
 
 #include <asm/fixmap.h>
 #include <asm/memory.h>
@@ -119,6 +120,36 @@ static void __init reserve_crashkernel(void)
 }
 #endif /* CONFIG_KEXEC_CORE */
 
+#ifdef CONFIG_CRASH_DUMP
+/*
+ * reserve_elfcorehdr() - reserves memory for elf core header
+ *
+ * This function reserves elf core header given in "elfcorehdr=" kernel
+ * command line parameter. This region contains all the information about
+ * primary kernel's core image and is used by a dump capture kernel to
+ * access the system memory on primary kernel.
+ */
+static void __init reserve_elfcorehdr(void)
+{
+	if (!elfcorehdr_size)
+		return;
+
+	if (memblock_is_region_reserved(elfcorehdr_addr, elfcorehdr_size)) {
+		pr_warn("elfcorehdr is overlapped\n");
+		return;
+	}
+
+	memblock_reserve(elfcorehdr_addr, elfcorehdr_size);
+
+	pr_info("Reserving %lldKB of memory at 0x%llx for elfcorehdr\n",
+		elfcorehdr_size >> 10, elfcorehdr_addr);
+}
+#else
+static void __init reserve_elfcorehdr(void)
+{
+	;
+}
+#endif /* CONFIG_CRASH_DUMP */
 /*
  * Return the maximum physical address for ZONE_DMA (DMA_BIT_MASK(32)). It
  * currently assumes that for memory starting above 4G, 32-bit devices will
@@ -227,6 +258,7 @@ void __init arm64_memblock_init(void)
 #ifdef CONFIG_KEXEC_CORE
 	reserve_crashkernel();
 #endif
+	reserve_elfcorehdr();
 
 	early_init_fdt_scan_reserved_mem();
 
-- 
2.5.0



_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 174+ messages in thread

* [PATCH 17/19] arm64: kdump: enable kdump in the arm64 defconfig
@ 2016-01-15 19:18   ` Geoff Levand
  0 siblings, 0 replies; 174+ messages in thread
From: Geoff Levand @ 2016-01-15 19:18 UTC (permalink / raw)
  To: Catalin Marinas, Will Deacon
  Cc: Mark Rutland, marc.zyngier, kexec, AKASHI Takahiro, James Morse,
	linux-arm-kernel, christoffer.dall

From: AKASHI Takahiro <takahiro.akashi@linaro.org>

Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
---
 arch/arm64/configs/defconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 0470fdf..6dc3d00 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -59,6 +59,7 @@ CONFIG_KSM=y
 CONFIG_TRANSPARENT_HUGEPAGE=y
 CONFIG_CMA=y
 CONFIG_KEXEC=y
+CONFIG_CRASH_DUMP=y
 CONFIG_CMDLINE="console=ttyAMA0"
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
 CONFIG_COMPAT=y
-- 
2.5.0



_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 174+ messages in thread

* [PATCH 14/19] arm64: kdump: reserve memory for crash dump kernel
@ 2016-01-15 19:18   ` Geoff Levand
  0 siblings, 0 replies; 174+ messages in thread
From: Geoff Levand @ 2016-01-15 19:18 UTC (permalink / raw)
  To: Catalin Marinas, Will Deacon
  Cc: Mark Rutland, marc.zyngier, kexec, AKASHI Takahiro, James Morse,
	linux-arm-kernel, christoffer.dall

From: AKASHI Takahiro <takahiro.akashi@linaro.org>

On primary kernel, the memory region used by crash dump kernel must be
specified by "crashkernel=" boot parameter. reserve_crashkernel()
will allocate and reserve the region for later use.

User space tools will be able to find the region marked as "Crash kernel"
in /proc/iomem.

Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
Signed-off-by: Mark Salter <msalter@redhat.com>
Signed-off-by: Pratyush Anand <panand@redhat.com>
---
 arch/arm64/kernel/setup.c |  7 +++++-
 arch/arm64/mm/init.c      | 57 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 63 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 8119479..293cee2 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -31,7 +31,6 @@
 #include <linux/screen_info.h>
 #include <linux/init.h>
 #include <linux/kexec.h>
-#include <linux/crash_dump.h>
 #include <linux/root_dev.h>
 #include <linux/cpu.h>
 #include <linux/interrupt.h>
@@ -221,6 +220,12 @@ static void __init request_standard_resources(void)
 		    kernel_data.end <= res->end)
 			request_resource(res, &kernel_data);
 	}
+
+#ifdef CONFIG_KEXEC_CORE
+	/* User space tools will find "Crash kernel" region in /proc/iomem. */
+	if (crashk_res.end)
+		insert_resource(&iomem_resource, &crashk_res);
+#endif
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 17bf39a..a8eae6b 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -34,6 +34,7 @@
 #include <linux/dma-contiguous.h>
 #include <linux/efi.h>
 #include <linux/swiotlb.h>
+#include <linux/kexec.h>
 
 #include <asm/fixmap.h>
 #include <asm/memory.h>
@@ -66,6 +67,58 @@ static int __init early_initrd(char *p)
 early_param("initrd", early_initrd);
 #endif
 
+#ifdef CONFIG_KEXEC_CORE
+/*
+ * reserve_crashkernel() - reserves memory for crash kernel
+ *
+ * This function reserves memory area given in "crashkernel=" kernel command
+ * line parameter. The memory reserved is used by dump capture kernel when
+ * primary kernel is crashing.
+ */
+static void __init reserve_crashkernel(void)
+{
+	unsigned long long crash_size = 0, crash_base = 0;
+	int ret;
+
+	ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
+				&crash_size, &crash_base);
+	if (ret)
+		return;
+
+	if (crash_base == 0) {
+		crash_base = memblock_find_in_range(0,
+				MEMBLOCK_ALLOC_ACCESSIBLE, crash_size, 1 << 21);
+		if (crash_base == 0) {
+			pr_warn("Unable to allocate crashkernel (size:%llx)\n",
+				crash_size);
+			return;
+		}
+		memblock_reserve(crash_base, crash_size);
+
+	} else {
+		/* User specifies base address explicitly. */
+		if (!memblock_is_region_memory(crash_base, crash_size) ||
+			memblock_is_region_reserved(crash_base, crash_size)) {
+			pr_warn("crashkernel has wrong address or size\n");
+			return;
+		}
+
+		if (crash_base & ((1 << 21) - 1)) {
+			pr_warn("crashkernel base address is not 2MB aligned\n");
+			return;
+		}
+
+		memblock_reserve(crash_base, crash_size);
+	}
+
+	pr_info("Reserving %lldMB of memory at %lldMB for crashkernel\n",
+		crash_size >> 20, crash_base >> 20);
+
+	crashk_res.start = crash_base;
+	crashk_res.end = crash_base + crash_size - 1;
+}
+#endif /* CONFIG_KEXEC_CORE */
+
 /*
  * Return the maximum physical address for ZONE_DMA (DMA_BIT_MASK(32)). It
  * currently assumes that for memory starting above 4G, 32-bit devices will
@@ -171,6 +224,10 @@ void __init arm64_memblock_init(void)
 		memblock_reserve(__virt_to_phys(initrd_start), initrd_end - initrd_start);
 #endif
 
+#ifdef CONFIG_KEXEC_CORE
+	reserve_crashkernel();
+#endif
+
 	early_init_fdt_scan_reserved_mem();
 
 	/* 4GB maximum for 32-bit only capable devices */
-- 
2.5.0



_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 174+ messages in thread

* [PATCH 11/19] arm64/kexec: Add core kexec support
@ 2016-01-15 19:18   ` Geoff Levand
  0 siblings, 0 replies; 174+ messages in thread
From: Geoff Levand @ 2016-01-15 19:18 UTC (permalink / raw)
  To: Catalin Marinas, Will Deacon
  Cc: Mark Rutland, marc.zyngier, kexec, AKASHI Takahiro, James Morse,
	linux-arm-kernel, christoffer.dall

Add three new files, kexec.h, machine_kexec.c and relocate_kernel.S to the
arm64 architecture that add support for the kexec re-boot mechanism
(CONFIG_KEXEC) on arm64 platforms.

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/Kconfig                  |  10 +++
 arch/arm64/include/asm/kexec.h      |  48 +++++++++++
 arch/arm64/kernel/Makefile          |   2 +
 arch/arm64/kernel/machine_kexec.c   | 158 ++++++++++++++++++++++++++++++++++++
 arch/arm64/kernel/relocate_kernel.S | 131 ++++++++++++++++++++++++++++++
 include/uapi/linux/kexec.h          |   1 +
 6 files changed, 350 insertions(+)
 create mode 100644 arch/arm64/include/asm/kexec.h
 create mode 100644 arch/arm64/kernel/machine_kexec.c
 create mode 100644 arch/arm64/kernel/relocate_kernel.S

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 871f217..b7d0078 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -556,6 +556,16 @@ config SECCOMP
 	  and the task is only allowed to execute a few safe syscalls
 	  defined by each seccomp mode.
 
+config KEXEC
+	depends on PM_SLEEP_SMP
+	select KEXEC_CORE
+	bool "kexec system call"
+	---help---
+	  kexec is a system call that implements the ability to shutdown your
+	  current kernel, and to start another kernel.  It is like a reboot
+	  but it is independent of the system firmware.   And like a reboot
+	  you can start any kernel with it, not just Linux.
+
 config XEN_DOM0
 	def_bool y
 	depends on XEN
diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
new file mode 100644
index 0000000..04744dc
--- /dev/null
+++ b/arch/arm64/include/asm/kexec.h
@@ -0,0 +1,48 @@
+/*
+ * kexec for arm64
+ *
+ * Copyright (C) Linaro.
+ * Copyright (C) Huawei Futurewei Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _ARM64_KEXEC_H
+#define _ARM64_KEXEC_H
+
+/* Maximum physical address we can use pages from */
+
+#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
+
+/* Maximum address we can reach in physical address mode */
+
+#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
+
+/* Maximum address we can use for the control code buffer */
+
+#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL)
+
+#define KEXEC_CONTROL_PAGE_SIZE 4096
+
+#define KEXEC_ARCH KEXEC_ARCH_AARCH64
+
+#ifndef __ASSEMBLY__
+
+/**
+ * crash_setup_regs() - save registers for the panic kernel
+ *
+ * @newregs: registers are saved here
+ * @oldregs: registers to be saved (may be %NULL)
+ */
+
+static inline void crash_setup_regs(struct pt_regs *newregs,
+				    struct pt_regs *oldregs)
+{
+	/* Empty routine needed to avoid build errors. */
+}
+
+#endif /* __ASSEMBLY__ */
+
+#endif
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 474691f..f68420d 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -41,6 +41,8 @@ arm64-obj-$(CONFIG_EFI)			+= efi.o efi-entry.stub.o
 arm64-obj-$(CONFIG_PCI)			+= pci.o
 arm64-obj-$(CONFIG_ARMV8_DEPRECATED)	+= armv8_deprecated.o
 arm64-obj-$(CONFIG_ACPI)		+= acpi.o
+arm64-obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o	\
+					   cpu-reset.o
 
 obj-y					+= $(arm64-obj-y) vdso/
 obj-m					+= $(arm64-obj-m)
diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
new file mode 100644
index 0000000..9379d31
--- /dev/null
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -0,0 +1,158 @@
+/*
+ * kexec for arm64
+ *
+ * Copyright (C) Linaro.
+ * Copyright (C) Huawei Futurewei Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/highmem.h>
+#include <linux/kexec.h>
+#include <linux/of_fdt.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+#include <asm/cacheflush.h>
+#include <asm/system_misc.h>
+
+#include "cpu-reset.h"
+
+/* Global variables for the arm64_relocate_new_kernel routine. */
+extern const unsigned char arm64_relocate_new_kernel[];
+extern const unsigned long arm64_relocate_new_kernel_size;
+
+static unsigned long kimage_start;
+
+void machine_kexec_cleanup(struct kimage *kimage)
+{
+	/* Empty routine needed to avoid build errors. */
+}
+
+/**
+ * machine_kexec_prepare - Prepare for a kexec reboot.
+ *
+ * Called from the core kexec code when a kernel image is loaded.
+ */
+int machine_kexec_prepare(struct kimage *kimage)
+{
+	kimage_start = kimage->start;
+	return 0;
+}
+
+/**
+ * kexec_list_flush - Helper to flush the kimage list to PoC.
+ */
+static void kexec_list_flush(struct kimage *kimage)
+{
+	kimage_entry_t *entry;
+	unsigned int flag;
+
+	for (entry = &kimage->head, flag = 0; flag != IND_DONE; entry++) {
+		void *addr = kmap(phys_to_page(*entry & PAGE_MASK));
+
+		flag = *entry & IND_FLAGS;
+
+		switch (flag) {
+		case IND_INDIRECTION:
+			entry = (kimage_entry_t *)addr - 1;
+			__flush_dcache_area(addr, PAGE_SIZE);
+			break;
+		case IND_DESTINATION:
+			break;
+		case IND_SOURCE:
+			__flush_dcache_area(addr, PAGE_SIZE);
+			break;
+		case IND_DONE:
+			break;
+		default:
+			BUG();
+		}
+		kunmap(addr);
+	}
+}
+
+/**
+ * kexec_segment_flush - Helper to flush the kimage segments to PoC.
+ */
+static void kexec_segment_flush(const struct kimage *kimage)
+{
+	unsigned long i;
+
+	pr_devel("%s:\n", __func__);
+
+	for (i = 0; i < kimage->nr_segments; i++) {
+		pr_devel("  segment[%lu]: %016lx - %016lx, %lx bytes, %lu pages\n",
+			i,
+			kimage->segment[i].mem,
+			kimage->segment[i].mem + kimage->segment[i].memsz,
+			kimage->segment[i].memsz,
+			kimage->segment[i].memsz /  PAGE_SIZE);
+
+		__flush_dcache_area(phys_to_virt(kimage->segment[i].mem),
+			kimage->segment[i].memsz);
+	}
+}
+
+/**
+ * machine_kexec - Do the kexec reboot.
+ *
+ * Called from the core kexec code for a sys_reboot with LINUX_REBOOT_CMD_KEXEC.
+ */
+void machine_kexec(struct kimage *kimage)
+{
+	phys_addr_t reboot_code_buffer_phys;
+	void *reboot_code_buffer;
+
+	BUG_ON(num_online_cpus() > 1);
+
+	reboot_code_buffer_phys = page_to_phys(kimage->control_code_page);
+	reboot_code_buffer = kmap(kimage->control_code_page);
+
+	/*
+	 * Copy arm64_relocate_new_kernel to the reboot_code_buffer for use
+	 * after the kernel is shut down.
+	 */
+	memcpy(reboot_code_buffer, arm64_relocate_new_kernel,
+		arm64_relocate_new_kernel_size);
+
+	/* Flush the reboot_code_buffer in preparation for its execution. */
+	__flush_dcache_area(reboot_code_buffer, arm64_relocate_new_kernel_size);
+	flush_icache_range((uintptr_t)reboot_code_buffer,
+		arm64_relocate_new_kernel_size);
+
+	/* Flush the kimage list. */
+	kexec_list_flush(kimage);
+
+	/* Flush the new image if already in place. */
+	if (kimage->head & IND_DONE)
+		kexec_segment_flush(kimage);
+
+	pr_info("Bye!\n");
+
+	/* Disable all DAIF exceptions. */
+	asm volatile ("msr daifset, #0xf" : : : "memory");
+
+	setup_mm_for_reboot();
+
+	/*
+	 * cpu_soft_restart will shutdown the MMU, disable data caches, then
+	 * transfer control to the reboot_code_buffer which contains a copy of
+	 * the arm64_relocate_new_kernel routine.  arm64_relocate_new_kernel
+	 * uses physical addressing to relocate the new image to its final
+	 * position and transfers control to the image entry point when the
+	 * relocation is complete.
+	 */
+
+	cpu_soft_restart(is_hyp_mode_available(),
+		reboot_code_buffer_phys, kimage->head, kimage_start, 0);
+
+	BUG(); /* Should never get here. */
+}
+
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+	/* Empty routine needed to avoid build errors. */
+}
diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S
new file mode 100644
index 0000000..e380db3
--- /dev/null
+++ b/arch/arm64/kernel/relocate_kernel.S
@@ -0,0 +1,131 @@
+/*
+ * kexec for arm64
+ *
+ * Copyright (C) Linaro.
+ * Copyright (C) Huawei Futurewei Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kexec.h>
+
+#include <asm/assembler.h>
+#include <asm/kexec.h>
+#include <asm/page.h>
+#include <asm/sysreg.h>
+
+/*
+ * arm64_relocate_new_kernel - Put a 2nd stage image in place and boot it.
+ *
+ * The memory that the old kernel occupies may be overwritten when coping the
+ * new image to its final location.  To assure that the
+ * arm64_relocate_new_kernel routine which does that copy is not overwritten,
+ * all code and data needed by arm64_relocate_new_kernel must be between the
+ * symbols arm64_relocate_new_kernel and arm64_relocate_new_kernel_end.  The
+ * machine_kexec() routine will copy arm64_relocate_new_kernel to the kexec
+ * control_code_page, a special page which has been set up to be preserved
+ * during the copy operation.
+ */
+.globl arm64_relocate_new_kernel
+arm64_relocate_new_kernel:
+
+	/* Setup the list loop variables. */
+	mov	x18, x1				/* x18 = kimage_start */
+	mov	x17, x0				/* x17 = kimage_head */
+	dcache_line_size x16, x0		/* x16 = dcache line size */
+	mov	x15, xzr			/* x15 = segment start */
+	mov	x14, xzr			/* x14 = entry ptr */
+	mov	x13, xzr			/* x13 = copy dest */
+
+	/* Clear the sctlr_el2 flags. */
+	mrs	x0, CurrentEL
+	cmp	x0, #CurrentEL_EL2
+	b.ne	1f
+	mrs	x0, sctlr_el2
+	ldr	x1, =SCTLR_ELx_FLAGS
+	bic	x0, x0, x1
+	msr	sctlr_el2, x0
+	isb
+1:
+
+	/* Check if the new image needs relocation. */
+	cbz	x17, .Ldone
+	tbnz	x17, IND_DONE_BIT, .Ldone
+
+.Lloop:
+	and	x12, x17, PAGE_MASK		/* x12 = addr */
+
+	/* Test the entry flags. */
+.Ltest_source:
+	tbz	x17, IND_SOURCE_BIT, .Ltest_indirection
+
+	/* Invalidate dest page to PoC. */
+	mov     x0, x13
+	add     x20, x0, #PAGE_SIZE
+	sub     x1, x16, #1
+	bic     x0, x0, x1
+2:	dc      ivac, x0
+	add     x0, x0, x16
+	cmp     x0, x20
+	b.lo    2b
+	dsb     sy
+
+	mov x20, x13
+	mov x21, x12
+	copy_page x20, x21, x0, x1, x2, x3, x4, x5, x6, x7
+
+	/* dest += PAGE_SIZE */
+	add	x13, x13, PAGE_SIZE
+	b	.Lnext
+
+.Ltest_indirection:
+	tbz	x17, IND_INDIRECTION_BIT, .Ltest_destination
+
+	/* ptr = addr */
+	mov	x14, x12
+	b	.Lnext
+
+.Ltest_destination:
+	tbz	x17, IND_DESTINATION_BIT, .Lnext
+
+	mov	x15, x12
+
+	/* dest = addr */
+	mov	x13, x12
+
+.Lnext:
+	/* entry = *ptr++ */
+	ldr	x17, [x14], #8
+
+	/* while (!(entry & DONE)) */
+	tbz	x17, IND_DONE_BIT, .Lloop
+
+.Ldone:
+	dsb	nsh
+	ic	iallu
+	dsb	nsh
+	isb
+
+	/* Start new image. */
+	mov	x0, xzr
+	mov	x1, xzr
+	mov	x2, xzr
+	mov	x3, xzr
+	br	x18
+
+.ltorg
+
+.align 3	/* To keep the 64-bit values below naturally aligned. */
+
+.Lcopy_end:
+.org	KEXEC_CONTROL_PAGE_SIZE
+
+/*
+ * arm64_relocate_new_kernel_size - Number of bytes to copy to the
+ * control_code_page.
+ */
+.globl arm64_relocate_new_kernel_size
+arm64_relocate_new_kernel_size:
+	.quad	.Lcopy_end - arm64_relocate_new_kernel
diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h
index 99048e5..aae5ebf 100644
--- a/include/uapi/linux/kexec.h
+++ b/include/uapi/linux/kexec.h
@@ -39,6 +39,7 @@
 #define KEXEC_ARCH_SH      (42 << 16)
 #define KEXEC_ARCH_MIPS_LE (10 << 16)
 #define KEXEC_ARCH_MIPS    ( 8 << 16)
+#define KEXEC_ARCH_AARCH64 (183 << 16)
 
 /* The artificial cap on the number of segments passed to kexec_load. */
 #define KEXEC_SEGMENT_MAX 16
-- 
2.5.0



_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 174+ messages in thread

* [PATCH 10/19] arm64: kvm: allows kvm cpu hotplug
@ 2016-01-15 19:18   ` Geoff Levand
  0 siblings, 0 replies; 174+ messages in thread
From: Geoff Levand @ 2016-01-15 19:18 UTC (permalink / raw)
  To: Catalin Marinas, Will Deacon
  Cc: Mark Rutland, marc.zyngier, kexec, AKASHI Takahiro, James Morse,
	linux-arm-kernel, christoffer.dall

From: AKASHI Takahiro <takahiro.akashi@linaro.org>

The current kvm implementation on arm64 does cpu-specific initialization
at system boot, and has no way to gracefully shutdown a core in terms of
kvm. This prevents, especially, kexec from rebooting the system on a boot
core in EL2.

This patch adds a cpu tear-down function and also puts an existing cpu-init
code into a separate function, kvm_arch_hardware_disable() and
kvm_arch_hardware_enable() respectively.
We don't need arm64-specific cpu hotplug hook any more.

Since this patch modifies common part of code between arm and arm64, one
stub definition, __cpu_reset_hyp_mode(), is added on arm side to avoid
compiling errors.

Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
---
 arch/arm/include/asm/kvm_host.h   | 10 ++++-
 arch/arm/include/asm/kvm_mmu.h    |  1 +
 arch/arm/kvm/arm.c                | 93 +++++++++++++++++++++++----------------
 arch/arm/kvm/mmu.c                |  5 +++
 arch/arm64/include/asm/kvm_host.h | 11 ++++-
 arch/arm64/include/asm/kvm_mmu.h  |  1 +
 arch/arm64/include/asm/virt.h     |  9 ++++
 arch/arm64/kvm/hyp-init.S         | 33 ++++++++++++++
 arch/arm64/kvm/hyp.S              | 32 ++++++++++++--
 9 files changed, 151 insertions(+), 44 deletions(-)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 6692982..9242765 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -214,6 +214,15 @@ static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr,
 	kvm_call_hyp((void*)hyp_stack_ptr, vector_ptr, pgd_ptr);
 }
 
+static inline void __cpu_reset_hyp_mode(phys_addr_t boot_pgd_ptr,
+					phys_addr_t phys_idmap_start)
+{
+	/*
+	 * TODO
+	 * kvm_call_reset(boot_pgd_ptr, phys_idmap_start);
+	 */
+}
+
 static inline int kvm_arch_dev_ioctl_check_extension(long ext)
 {
 	return 0;
@@ -226,7 +235,6 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
 
 struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
 
-static inline void kvm_arch_hardware_disable(void) {}
 static inline void kvm_arch_hardware_unsetup(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 405aa18..dc6fadf 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -66,6 +66,7 @@ void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu);
 phys_addr_t kvm_mmu_get_httbr(void);
 phys_addr_t kvm_mmu_get_boot_httbr(void);
 phys_addr_t kvm_get_idmap_vector(void);
+phys_addr_t kvm_get_idmap_start(void);
 int kvm_mmu_init(void);
 void kvm_clear_hyp_idmap(void);
 
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index e06fd29..e91f80e 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -16,7 +16,6 @@
  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  */
 
-#include <linux/cpu.h>
 #include <linux/cpu_pm.h>
 #include <linux/errno.h>
 #include <linux/err.h>
@@ -61,6 +60,8 @@ static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
 static u8 kvm_next_vmid;
 static DEFINE_SPINLOCK(kvm_vmid_lock);
 
+static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled);
+
 static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)
 {
 	BUG_ON(preemptible());
@@ -85,11 +86,6 @@ struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void)
 	return &kvm_arm_running_vcpu;
 }
 
-int kvm_arch_hardware_enable(void)
-{
-	return 0;
-}
-
 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
 {
 	return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
@@ -577,7 +573,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		/*
 		 * Re-check atomic conditions
 		 */
-		if (signal_pending(current)) {
+		if (unlikely(!__this_cpu_read(kvm_arm_hardware_enabled))) {
+			/* cpu has been torn down */
+			ret = 0;
+			run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+			run->fail_entry.hardware_entry_failure_reason
+					= (u64)-ENOEXEC;
+		} else if (signal_pending(current)) {
 			ret = -EINTR;
 			run->exit_reason = KVM_EXIT_INTR;
 		}
@@ -954,7 +956,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
 	}
 }
 
-static void cpu_init_hyp_mode(void *dummy)
+static void cpu_init_hyp_mode(void)
 {
 	phys_addr_t boot_pgd_ptr;
 	phys_addr_t pgd_ptr;
@@ -976,36 +978,56 @@ static void cpu_init_hyp_mode(void *dummy)
 	kvm_arm_init_debug();
 }
 
-static int hyp_init_cpu_notify(struct notifier_block *self,
-			       unsigned long action, void *cpu)
+static void cpu_reset_hyp_mode(void)
 {
-	switch (action) {
-	case CPU_STARTING:
-	case CPU_STARTING_FROZEN:
-		if (__hyp_get_vectors() == hyp_default_vectors)
-			cpu_init_hyp_mode(NULL);
-		break;
+	phys_addr_t boot_pgd_ptr;
+	phys_addr_t phys_idmap_start;
+
+	boot_pgd_ptr = kvm_mmu_get_boot_httbr();
+	phys_idmap_start = kvm_get_idmap_start();
+
+	__cpu_reset_hyp_mode(boot_pgd_ptr, phys_idmap_start);
+}
+
+int kvm_arch_hardware_enable(void)
+{
+	if (!__this_cpu_read(kvm_arm_hardware_enabled)) {
+		cpu_init_hyp_mode();
+		__this_cpu_write(kvm_arm_hardware_enabled, 1);
 	}
 
-	return NOTIFY_OK;
+	return 0;
 }
 
-static struct notifier_block hyp_init_cpu_nb = {
-	.notifier_call = hyp_init_cpu_notify,
-};
+void kvm_arch_hardware_disable(void)
+{
+	if (!__this_cpu_read(kvm_arm_hardware_enabled))
+		return;
+
+	cpu_reset_hyp_mode();
+	__this_cpu_write(kvm_arm_hardware_enabled, 0);
+}
 
 #ifdef CONFIG_CPU_PM
 static int hyp_init_cpu_pm_notifier(struct notifier_block *self,
 				    unsigned long cmd,
 				    void *v)
 {
-	if (cmd == CPU_PM_EXIT &&
-	    __hyp_get_vectors() == hyp_default_vectors) {
-		cpu_init_hyp_mode(NULL);
+	switch (cmd) {
+	case CPU_PM_ENTER:
+		if (__this_cpu_read(kvm_arm_hardware_enabled))
+			cpu_reset_hyp_mode();
+
 		return NOTIFY_OK;
-	}
+	case CPU_PM_EXIT:
+		if (__this_cpu_read(kvm_arm_hardware_enabled))
+			cpu_init_hyp_mode();
 
-	return NOTIFY_DONE;
+		return NOTIFY_OK;
+
+	default:
+		return NOTIFY_DONE;
+	}
 }
 
 static struct notifier_block hyp_init_cpu_pm_nb = {
@@ -1103,14 +1125,20 @@ static int init_hyp_mode(void)
 	}
 
 	/*
-	 * Execute the init code on each CPU.
+	 * Init this CPU temporarily to execute kvm_hyp_call()
+	 * during kvm_vgic_hyp_init().
 	 */
-	on_each_cpu(cpu_init_hyp_mode, NULL, 1);
+	preempt_disable();
+	cpu_init_hyp_mode();
 
 	/*
 	 * Init HYP view of VGIC
 	 */
 	err = kvm_vgic_hyp_init();
+
+	cpu_reset_hyp_mode();
+	preempt_enable();
+
 	if (err)
 		goto out_free_context;
 
@@ -1181,26 +1209,15 @@ int kvm_arch_init(void *opaque)
 		}
 	}
 
-	cpu_notifier_register_begin();
-
 	err = init_hyp_mode();
 	if (err)
 		goto out_err;
 
-	err = __register_cpu_notifier(&hyp_init_cpu_nb);
-	if (err) {
-		kvm_err("Cannot register HYP init CPU notifier (%d)\n", err);
-		goto out_err;
-	}
-
-	cpu_notifier_register_done();
-
 	hyp_cpu_pm_init();
 
 	kvm_coproc_table_init();
 	return 0;
 out_err:
-	cpu_notifier_register_done();
 	return err;
 }
 
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 61d96a6..e6651dd 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -1643,6 +1643,11 @@ phys_addr_t kvm_get_idmap_vector(void)
 	return hyp_idmap_vector;
 }
 
+phys_addr_t kvm_get_idmap_start(void)
+{
+	return hyp_idmap_start;
+}
+
 int kvm_mmu_init(void)
 {
 	int err;
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index a35ce72..de08e4c 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -223,6 +223,7 @@ struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
 struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
 
 u64 kvm_call_hyp(void *hypfn, ...);
+void kvm_call_reset(phys_addr_t boot_pgd_ptr, phys_addr_t phys_idmap_start);
 void force_vm_exit(const cpumask_t *mask);
 void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
 
@@ -247,7 +248,15 @@ static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr,
 		     hyp_stack_ptr, vector_ptr);
 }
 
-static inline void kvm_arch_hardware_disable(void) {}
+static inline void __cpu_reset_hyp_mode(phys_addr_t boot_pgd_ptr,
+					phys_addr_t phys_idmap_start)
+{
+	/*
+	 * Call reset code, and switch back to stub hyp vectors.
+	 */
+	kvm_call_reset(boot_pgd_ptr, phys_idmap_start);
+}
+
 static inline void kvm_arch_hardware_unsetup(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 6150567..ff5a087 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -98,6 +98,7 @@ void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu);
 phys_addr_t kvm_mmu_get_httbr(void);
 phys_addr_t kvm_mmu_get_boot_httbr(void);
 phys_addr_t kvm_get_idmap_vector(void);
+phys_addr_t kvm_get_idmap_start(void);
 int kvm_mmu_init(void);
 void kvm_clear_hyp_idmap(void);
 
diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 3070096..bca79f9 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -58,9 +58,18 @@
 
 #define HVC_CALL_FUNC 3
 
+/*
+ * HVC_RESET_CPU - Reset cpu in EL2 to initial state.
+ *
+ * @x0: entry address in trampoline code in va
+ * @x1: identical mapping page table in pa
+ */
+
 #define BOOT_CPU_MODE_EL1	(0xe11)
 #define BOOT_CPU_MODE_EL2	(0xe12)
 
+#define HVC_RESET_CPU 4
+
 #ifndef __ASSEMBLY__
 
 /*
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
index 1d7e502..d909ce2 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -140,6 +140,39 @@ merged:
 	eret
 ENDPROC(__kvm_hyp_init)
 
+	/*
+	 * x0: HYP boot pgd
+	 * x1: HYP phys_idmap_start
+	 */
+ENTRY(__kvm_hyp_reset)
+	/* We're in trampoline code in VA, switch back to boot page tables */
+	msr	ttbr0_el2, x0
+	isb
+
+	/* Invalidate the old TLBs */
+	tlbi	alle2
+	dsb	sy
+
+	/* Branch into PA space */
+	adr	x0, 1f
+	bfi	x1, x0, #0, #PAGE_SHIFT
+	br	x1
+
+	/* We're now in idmap, disable MMU */
+1:	mrs	x0, sctlr_el2
+	ldr	x1, =SCTLR_ELx_FLAGS
+	bic	x0, x0, x1		// Clear SCTL_M and etc
+	msr	sctlr_el2, x0
+	isb
+
+	/* Install stub vectors */
+	adrp	x0, __hyp_stub_vectors
+	add	x0, x0, #:lo12:__hyp_stub_vectors
+	msr	vbar_el2, x0
+
+	eret
+ENDPROC(__kvm_hyp_reset)
+
 	.ltorg
 
 	.popsection
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 15b1ef9..ed82dc2 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -943,6 +943,11 @@ ENTRY(kvm_call_hyp)
 	ret
 ENDPROC(kvm_call_hyp)
 
+ENTRY(kvm_call_reset)
+	hvc	#HVC_RESET_CPU
+	ret
+ENDPROC(kvm_call_reset)
+
 .macro invalid_vector	label, target
 	.align	2
 \label:
@@ -986,10 +991,27 @@ el1_sync:					// Guest trapped into EL2
 	cmp	x18, #HVC_GET_VECTORS
 	b.ne	1f
 	mrs	x0, vbar_el2
-	b	2f
-
-1:	/* Default to HVC_CALL_HYP. */
+	b	do_eret
 
+	/* jump into trampoline code */
+1:	cmp	x18, #HVC_RESET_CPU
+	b.ne	2f
+	/*
+	 * Entry point is:
+	 *	TRAMPOLINE_VA
+	 *	+ (__kvm_hyp_reset - (__hyp_idmap_text_start & PAGE_MASK))
+	 */
+	adrp	x2, __kvm_hyp_reset
+	add	x2, x2, #:lo12:__kvm_hyp_reset
+	adrp	x3, __hyp_idmap_text_start
+	add	x3, x3, #:lo12:__hyp_idmap_text_start
+	and	x3, x3, PAGE_MASK
+	sub	x2, x2, x3
+	ldr	x3, =TRAMPOLINE_VA
+	add	x2, x2, x3
+	br	x2				// no return
+
+2:	/* Default to HVC_CALL_HYP. */
 	push	lr, xzr
 
 	/*
@@ -1003,7 +1025,9 @@ el1_sync:					// Guest trapped into EL2
 	blr	lr
 
 	pop	lr, xzr
-2:	eret
+
+do_eret:
+	eret
 
 el1_trap:
 	/*
-- 
2.5.0



_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 174+ messages in thread

* [PATCH 12/19] arm64/kexec: Enable kexec in the arm64 defconfig
@ 2016-01-15 19:18   ` Geoff Levand
  0 siblings, 0 replies; 174+ messages in thread
From: Geoff Levand @ 2016-01-15 19:18 UTC (permalink / raw)
  To: Catalin Marinas, Will Deacon
  Cc: Mark Rutland, marc.zyngier, kexec, AKASHI Takahiro, James Morse,
	linux-arm-kernel, christoffer.dall

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/configs/defconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index bdd7aa3..0470fdf 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -58,6 +58,7 @@ CONFIG_PREEMPT=y
 CONFIG_KSM=y
 CONFIG_TRANSPARENT_HUGEPAGE=y
 CONFIG_CMA=y
+CONFIG_KEXEC=y
 CONFIG_CMDLINE="console=ttyAMA0"
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
 CONFIG_COMPAT=y
-- 
2.5.0



_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 174+ messages in thread

* [PATCH 13/19] arm64/kexec: Add pr_debug output
@ 2016-01-15 19:18   ` Geoff Levand
  0 siblings, 0 replies; 174+ messages in thread
From: Geoff Levand @ 2016-01-15 19:18 UTC (permalink / raw)
  To: Catalin Marinas, Will Deacon
  Cc: Mark Rutland, marc.zyngier, kexec, AKASHI Takahiro, James Morse,
	linux-arm-kernel, christoffer.dall

To aid in debugging kexec problems or when adding new functionality to kexec add
a new routine kexec_image_info() and several inline pr_debug statements.

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/kernel/machine_kexec.c | 70 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 68 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
index 9379d31..a375268 100644
--- a/arch/arm64/kernel/machine_kexec.c
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -11,6 +11,7 @@
 
 #include <linux/highmem.h>
 #include <linux/kexec.h>
+#include <linux/libfdt_env.h>
 #include <linux/of_fdt.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
@@ -26,6 +27,50 @@ extern const unsigned long arm64_relocate_new_kernel_size;
 
 static unsigned long kimage_start;
 
+/**
+ * kexec_is_dtb - Helper routine to check the device tree header signature.
+ */
+static bool kexec_is_dtb(const void *dtb)
+{
+	__be32 magic;
+
+	if (get_user(magic, (__be32 *)dtb))
+		return false;
+
+	return fdt32_to_cpu(magic) == OF_DT_HEADER;
+}
+
+/**
+ * kexec_image_info - For debugging output.
+ */
+#define kexec_image_info(_i) _kexec_image_info(__func__, __LINE__, _i)
+static void _kexec_image_info(const char *func, int line,
+	const struct kimage *kimage)
+{
+	unsigned long i;
+
+#ifndef DEBUG
+	return;
+#endif
+	pr_debug("%s:%d:\n", func, line);
+	pr_debug("  kexec kimage info:\n");
+	pr_debug("    type:        %d\n", kimage->type);
+	pr_debug("    start:       %lx\n", kimage->start);
+	pr_debug("    head:        %lx\n", kimage->head);
+	pr_debug("    nr_segments: %lu\n", kimage->nr_segments);
+
+	for (i = 0; i < kimage->nr_segments; i++) {
+		pr_debug("      segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages%s\n",
+			i,
+			kimage->segment[i].mem,
+			kimage->segment[i].mem + kimage->segment[i].memsz,
+			kimage->segment[i].memsz,
+			kimage->segment[i].memsz /  PAGE_SIZE,
+			(kexec_is_dtb(kimage->segment[i].buf) ?
+				", dtb segment" : ""));
+	}
+}
+
 void machine_kexec_cleanup(struct kimage *kimage)
 {
 	/* Empty routine needed to avoid build errors. */
@@ -39,6 +84,8 @@ void machine_kexec_cleanup(struct kimage *kimage)
 int machine_kexec_prepare(struct kimage *kimage)
 {
 	kimage_start = kimage->start;
+	kexec_image_info(kimage);
+
 	return 0;
 }
 
@@ -81,10 +128,10 @@ static void kexec_segment_flush(const struct kimage *kimage)
 {
 	unsigned long i;
 
-	pr_devel("%s:\n", __func__);
+	pr_debug("%s:\n", __func__);
 
 	for (i = 0; i < kimage->nr_segments; i++) {
-		pr_devel("  segment[%lu]: %016lx - %016lx, %lx bytes, %lu pages\n",
+		pr_debug("  segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n",
 			i,
 			kimage->segment[i].mem,
 			kimage->segment[i].mem + kimage->segment[i].memsz,
@@ -111,6 +158,25 @@ void machine_kexec(struct kimage *kimage)
 	reboot_code_buffer_phys = page_to_phys(kimage->control_code_page);
 	reboot_code_buffer = kmap(kimage->control_code_page);
 
+	kexec_image_info(kimage);
+
+	pr_debug("%s:%d: control_code_page:        %p\n", __func__, __LINE__,
+		kimage->control_code_page);
+	pr_debug("%s:%d: reboot_code_buffer_phys:  %pa\n", __func__, __LINE__,
+		&reboot_code_buffer_phys);
+	pr_debug("%s:%d: reboot_code_buffer:       %p\n", __func__, __LINE__,
+		reboot_code_buffer);
+	pr_debug("%s:%d: relocate_new_kernel:      %p\n", __func__, __LINE__,
+		arm64_relocate_new_kernel);
+	pr_debug("%s:%d: relocate_new_kernel_size: 0x%lx(%lu) bytes\n",
+		__func__, __LINE__, arm64_relocate_new_kernel_size,
+		arm64_relocate_new_kernel_size);
+
+	pr_debug("%s:%d: kimage_head:              %lx\n", __func__, __LINE__,
+		kimage->head);
+	pr_debug("%s:%d: kimage_start:             %lx\n", __func__, __LINE__,
+		kimage_start);
+
 	/*
 	 * Copy arm64_relocate_new_kernel to the reboot_code_buffer for use
 	 * after the kernel is shut down.
-- 
2.5.0



_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 174+ messages in thread

* [PATCH 19/19] arm64: kdump: relax BUG_ON() if more than one cpus are still active
@ 2016-01-15 19:18   ` Geoff Levand
  0 siblings, 0 replies; 174+ messages in thread
From: Geoff Levand @ 2016-01-15 19:18 UTC (permalink / raw)
  To: Catalin Marinas, Will Deacon
  Cc: Mark Rutland, marc.zyngier, kexec, AKASHI Takahiro, James Morse,
	linux-arm-kernel, christoffer.dall

From: AKASHI Takahiro <takahiro.akashi@linaro.org>

We should try best in case of kdump.
So even if not all secondary cpus have shut down, we do kdump anyway.

Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
---
 arch/arm64/kernel/machine_kexec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
index 0ba2ae4..3230551 100644
--- a/arch/arm64/kernel/machine_kexec.c
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -157,7 +157,7 @@ void machine_kexec(struct kimage *kimage)
 	phys_addr_t reboot_code_buffer_phys;
 	void *reboot_code_buffer;
 
-	BUG_ON(num_online_cpus() > 1);
+	BUG_ON((num_online_cpus() > 1) && !WARN_ON(in_crash_kexec));
 
 	reboot_code_buffer_phys = page_to_phys(kimage->control_code_page);
 	reboot_code_buffer = kmap(kimage->control_code_page);
-- 
2.5.0


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 174+ messages in thread

* [PATCH 15/19] arm64: kdump: implement machine_crash_shutdown()
@ 2016-01-15 19:18   ` Geoff Levand
  0 siblings, 0 replies; 174+ messages in thread
From: Geoff Levand @ 2016-01-15 19:18 UTC (permalink / raw)
  To: Catalin Marinas, Will Deacon
  Cc: Mark Rutland, marc.zyngier, kexec, AKASHI Takahiro, James Morse,
	linux-arm-kernel, christoffer.dall

From: AKASHI Takahiro <takahiro.akashi@linaro.org>

kdump calls machine_crash_shutdown() to shut down non-boot cpus and
save registers' status in per-cpu ELF notes before starting the crash
dump kernel. See kernel_kexec().

ipi_cpu_stop() is a bit modified and used to support this behavior.

Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
---
 arch/arm64/include/asm/kexec.h    | 47 +++++++++++++++++++++++++++-
 arch/arm64/kernel/machine_kexec.c | 65 +++++++++++++++++++++++++++++++++++++--
 arch/arm64/kernel/smp.c           | 18 ++++++++---
 3 files changed, 123 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
index 04744dc..3dffb1d 100644
--- a/arch/arm64/include/asm/kexec.h
+++ b/arch/arm64/include/asm/kexec.h
@@ -30,6 +30,21 @@
 
 #ifndef __ASSEMBLY__
 
+extern bool in_crash_kexec;
+
+static inline bool is_in_crash_kexec(void)
+{
+#ifdef CONFIG_KEXEC_CORE
+	return in_crash_kexec;
+#else
+	return false;
+#endif
+}
+
+#ifndef CONFIG_KEXEC_CORE
+#define crash_save_cpu(regs, cpu)
+#endif
+
 /**
  * crash_setup_regs() - save registers for the panic kernel
  *
@@ -40,7 +55,37 @@
 static inline void crash_setup_regs(struct pt_regs *newregs,
 				    struct pt_regs *oldregs)
 {
-	/* Empty routine needed to avoid build errors. */
+	if (oldregs) {
+		memcpy(newregs, oldregs, sizeof(*newregs));
+	} else {
+		__asm__ __volatile__ (
+			"stp	 x0,   x1, [%3, #16 *  0]\n"
+			"stp	 x2,   x3, [%3, #16 *  1]\n"
+			"stp	 x4,   x5, [%3, #16 *  2]\n"
+			"stp	 x6,   x7, [%3, #16 *  3]\n"
+			"stp	 x8,   x9, [%3, #16 *  4]\n"
+			"stp	x10,  x11, [%3, #16 *  5]\n"
+			"stp	x12,  x13, [%3, #16 *  6]\n"
+			"stp	x14,  x15, [%3, #16 *  7]\n"
+			"stp	x16,  x17, [%3, #16 *  8]\n"
+			"stp	x18,  x19, [%3, #16 *  9]\n"
+			"stp	x20,  x21, [%3, #16 * 10]\n"
+			"stp	x22,  x23, [%3, #16 * 11]\n"
+			"stp	x24,  x25, [%3, #16 * 12]\n"
+			"stp	x26,  x27, [%3, #16 * 13]\n"
+			"stp	x28,  x29, [%3, #16 * 14]\n"
+			"str	x30,	   [%3, #16 * 15]\n"
+			"mov	%0, sp\n"
+			"adr	%1, 1f\n"
+			"mrs	%2, spsr_el1\n"
+		"1:"
+			: "=r" (newregs->sp),
+			  "=r" (newregs->pc),
+			  "=r" (newregs->pstate)
+			: "r"  (&newregs->regs)
+			: "memory"
+		);
+	}
 }
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
index a375268..0ba2ae4 100644
--- a/arch/arm64/kernel/machine_kexec.c
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -10,6 +10,9 @@
  */
 
 #include <linux/highmem.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
 #include <linux/kexec.h>
 #include <linux/libfdt_env.h>
 #include <linux/of_fdt.h>
@@ -25,6 +28,7 @@
 extern const unsigned char arm64_relocate_new_kernel[];
 extern const unsigned long arm64_relocate_new_kernel_size;
 
+bool in_crash_kexec;
 static unsigned long kimage_start;
 
 /**
@@ -212,13 +216,70 @@ void machine_kexec(struct kimage *kimage)
 	 * relocation is complete.
 	 */
 
-	cpu_soft_restart(is_hyp_mode_available(),
+	cpu_soft_restart(in_crash_kexec ? 0 : is_hyp_mode_available(),
 		reboot_code_buffer_phys, kimage->head, kimage_start, 0);
 
 	BUG(); /* Should never get here. */
 }
 
+static void machine_kexec_mask_interrupts(void)
+{
+	unsigned int i;
+	struct irq_desc *desc;
+
+	for_each_irq_desc(i, desc) {
+		struct irq_chip *chip;
+		int ret;
+
+		chip = irq_desc_get_chip(desc);
+		if (!chip)
+			continue;
+
+		/*
+		 * First try to remove the active state. If this
+		 * fails, try to EOI the interrupt.
+		 */
+		ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false);
+
+		if (ret && irqd_irq_inprogress(&desc->irq_data) &&
+		    chip->irq_eoi)
+			chip->irq_eoi(&desc->irq_data);
+
+		if (chip->irq_mask)
+			chip->irq_mask(&desc->irq_data);
+
+		if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data))
+			chip->irq_disable(&desc->irq_data);
+	}
+}
+
+/**
+ * machine_crash_shutdown - shutdown non-crashing cpus and save registers
+ */
 void machine_crash_shutdown(struct pt_regs *regs)
 {
-	/* Empty routine needed to avoid build errors. */
+	struct pt_regs dummy_regs;
+	int cpu;
+
+	local_irq_disable();
+
+	in_crash_kexec = true;
+
+	/*
+	 * clear and initialize the per-cpu info. This is necessary
+	 * because, otherwise, slots for offline cpus would never be
+	 * filled up. See smp_send_stop().
+	 */
+	memset(&dummy_regs, 0, sizeof(dummy_regs));
+	for_each_possible_cpu(cpu)
+		crash_save_cpu(&dummy_regs, cpu);
+
+	/* shutdown non-crashing cpus */
+	smp_send_stop();
+
+	/* for crashing cpu */
+	crash_save_cpu(regs, smp_processor_id());
+	machine_kexec_mask_interrupts();
+
+	pr_info("Starting crashdump kernel...\n");
 }
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index b1adc51..aa45c21 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -37,6 +37,7 @@
 #include <linux/completion.h>
 #include <linux/of.h>
 #include <linux/irq_work.h>
+#include <linux/kexec.h>
 
 #include <asm/alternative.h>
 #include <asm/atomic.h>
@@ -44,6 +45,7 @@
 #include <asm/cpu.h>
 #include <asm/cputype.h>
 #include <asm/cpu_ops.h>
+#include <asm/kexec.h>
 #include <asm/mmu_context.h>
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -54,6 +56,8 @@
 #include <asm/ptrace.h>
 #include <asm/virt.h>
 
+#include "cpu-reset.h"
+
 #define CREATE_TRACE_POINTS
 #include <trace/events/ipi.h>
 
@@ -683,10 +687,16 @@ static DEFINE_RAW_SPINLOCK(stop_lock);
 /*
  * ipi_cpu_stop - handle IPI from smp_send_stop()
  */
-static void ipi_cpu_stop(unsigned int cpu)
+static void ipi_cpu_stop(unsigned int cpu, struct pt_regs *regs)
 {
-	if (system_state == SYSTEM_BOOTING ||
-	    system_state == SYSTEM_RUNNING) {
+	if (is_in_crash_kexec()) {
+		crash_save_cpu(regs, cpu);
+		/*
+		 * printing messages at panic may slow down the shutdown.
+		 * So don't fall through dump_stack().
+		 */
+	} else if (system_state == SYSTEM_BOOTING ||
+			system_state == SYSTEM_RUNNING) {
 		raw_spin_lock(&stop_lock);
 		pr_crit("CPU%u: stopping\n", cpu);
 		dump_stack();
@@ -727,7 +737,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
 
 	case IPI_CPU_STOP:
 		irq_enter();
-		ipi_cpu_stop(cpu);
+		ipi_cpu_stop(cpu, regs);
 		irq_exit();
 		break;
 
-- 
2.5.0



_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 174+ messages in thread

* [PATCH 09/19] Revert "arm64: remove dead code"
  2016-01-15 19:18   ` Geoff Levand
@ 2016-01-15 19:55     ` Mark Rutland
  -1 siblings, 0 replies; 174+ messages in thread
From: Mark Rutland @ 2016-01-15 19:55 UTC (permalink / raw)
  To: linux-arm-kernel

Hi,

On Fri, Jan 15, 2016 at 07:18:37PM +0000, Geoff Levand wrote:
> This reverts commit b08d4640a3dca68670fc5af2fe9205b395a02388.
> 
> Add back the setup_mm_for_reboot() needed for kexec.

My pagetable rework series [1,2] adds cpu_install_idmap() [3], which
supersedes setup_mm_for_reboot, and differs only in name.

I intend to rebase once v4.5-rc1 comes out. Unless there are substantial
changes or new comments, I expect that to be on a stable branche shortly
thereafter. When that happens, it should be possible to drop this patch.

Thanks,
Mark.

[1] http://lists.infradead.org/pipermail/linux-arm-kernel/2016-January/397095.html
[2] https://git.kernel.org/cgit/linux/kernel/git/mark/linux.git/log/?h=arm64/pagetable-rework
[3] http://lists.infradead.org/pipermail/linux-arm-kernel/2016-January/397101.html

> 
> Signed-off-by: Geoff Levand <geoff@infradead.org>
> ---
>  arch/arm64/include/asm/mmu.h |  1 +
>  arch/arm64/mm/mmu.c          | 11 +++++++++++
>  2 files changed, 12 insertions(+)
> 
> diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
> index 990124a..6326d11 100644
> --- a/arch/arm64/include/asm/mmu.h
> +++ b/arch/arm64/include/asm/mmu.h
> @@ -29,6 +29,7 @@ typedef struct {
>  #define ASID(mm)	((mm)->context.id.counter & 0xffff)
>  
>  extern void paging_init(void);
> +extern void setup_mm_for_reboot(void);
>  extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt);
>  extern void init_mem_pgprot(void);
>  extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> index 873e363..afcf1ee 100644
> --- a/arch/arm64/mm/mmu.c
> +++ b/arch/arm64/mm/mmu.c
> @@ -466,6 +466,17 @@ void __init paging_init(void)
>  }
>  
>  /*
> + * Enable the identity mapping to allow the MMU disabling.
> + */
> +void setup_mm_for_reboot(void)
> +{
> +	cpu_set_reserved_ttbr0();
> +	flush_tlb_all();
> +	cpu_set_idmap_tcr_t0sz();
> +	cpu_switch_mm(idmap_pg_dir, &init_mm);
> +}
> +
> +/*
>   * Check whether a kernel address is valid (derived from arch/x86/).
>   */
>  int kern_addr_valid(unsigned long addr)
> -- 
> 2.5.0
> 
> 

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 09/19] Revert "arm64: remove dead code"
@ 2016-01-15 19:55     ` Mark Rutland
  0 siblings, 0 replies; 174+ messages in thread
From: Mark Rutland @ 2016-01-15 19:55 UTC (permalink / raw)
  To: Geoff Levand
  Cc: marc.zyngier, Catalin Marinas, Will Deacon, AKASHI Takahiro,
	James Morse, linux-arm-kernel, kexec, christoffer.dall

Hi,

On Fri, Jan 15, 2016 at 07:18:37PM +0000, Geoff Levand wrote:
> This reverts commit b08d4640a3dca68670fc5af2fe9205b395a02388.
> 
> Add back the setup_mm_for_reboot() needed for kexec.

My pagetable rework series [1,2] adds cpu_install_idmap() [3], which
supersedes setup_mm_for_reboot, and differs only in name.

I intend to rebase once v4.5-rc1 comes out. Unless there are substantial
changes or new comments, I expect that to be on a stable branche shortly
thereafter. When that happens, it should be possible to drop this patch.

Thanks,
Mark.

[1] http://lists.infradead.org/pipermail/linux-arm-kernel/2016-January/397095.html
[2] https://git.kernel.org/cgit/linux/kernel/git/mark/linux.git/log/?h=arm64/pagetable-rework
[3] http://lists.infradead.org/pipermail/linux-arm-kernel/2016-January/397101.html

> 
> Signed-off-by: Geoff Levand <geoff@infradead.org>
> ---
>  arch/arm64/include/asm/mmu.h |  1 +
>  arch/arm64/mm/mmu.c          | 11 +++++++++++
>  2 files changed, 12 insertions(+)
> 
> diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
> index 990124a..6326d11 100644
> --- a/arch/arm64/include/asm/mmu.h
> +++ b/arch/arm64/include/asm/mmu.h
> @@ -29,6 +29,7 @@ typedef struct {
>  #define ASID(mm)	((mm)->context.id.counter & 0xffff)
>  
>  extern void paging_init(void);
> +extern void setup_mm_for_reboot(void);
>  extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt);
>  extern void init_mem_pgprot(void);
>  extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> index 873e363..afcf1ee 100644
> --- a/arch/arm64/mm/mmu.c
> +++ b/arch/arm64/mm/mmu.c
> @@ -466,6 +466,17 @@ void __init paging_init(void)
>  }
>  
>  /*
> + * Enable the identity mapping to allow the MMU disabling.
> + */
> +void setup_mm_for_reboot(void)
> +{
> +	cpu_set_reserved_ttbr0();
> +	flush_tlb_all();
> +	cpu_set_idmap_tcr_t0sz();
> +	cpu_switch_mm(idmap_pg_dir, &init_mm);
> +}
> +
> +/*
>   * Check whether a kernel address is valid (derived from arch/x86/).
>   */
>  int kern_addr_valid(unsigned long addr)
> -- 
> 2.5.0
> 
> 

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 04/19] arm64: Cleanup SCTLR flags
  2016-01-15 19:18   ` Geoff Levand
@ 2016-01-15 20:07     ` Mark Rutland
  -1 siblings, 0 replies; 174+ messages in thread
From: Mark Rutland @ 2016-01-15 20:07 UTC (permalink / raw)
  To: linux-arm-kernel

[Adding Marc as this touches KVM code]

On Fri, Jan 15, 2016 at 07:18:37PM +0000, Geoff Levand wrote:
> We currently have macros defining flags for the arm64 sctlr registers in both
> kvm_arm.h and sysreg.h.  To clean things up and simplify move the definitions
> of the SCTLR_EL2 flags from kvm_arm.h to sysreg.h, rename any SCTLR_EL1 or
> SCTLR_EL2 flags that are common to both registers to be SCTLR_ELx, with 'x'
> indicating a common flag, and fixup all files to include the proper header or
> to use the new macro names.

I am certainly in favour of having consistently named and located macros
for register fields.

> Signed-off-by: Geoff Levand <geoff@infradead.org>
> ---
>  arch/arm64/include/asm/kvm_arm.h | 11 -----------
>  arch/arm64/include/asm/sysreg.h  | 19 +++++++++++++++----
>  arch/arm64/kvm/hyp-init.S        |  6 +++---
>  3 files changed, 18 insertions(+), 18 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
> index 5e6857b..92ef6f6 100644
> --- a/arch/arm64/include/asm/kvm_arm.h
> +++ b/arch/arm64/include/asm/kvm_arm.h
> @@ -83,17 +83,6 @@
>  #define HCR_INT_OVERRIDE   (HCR_FMO | HCR_IMO)
>  
>  
> -/* Hyp System Control Register (SCTLR_EL2) bits */
> -#define SCTLR_EL2_EE	(1 << 25)
> -#define SCTLR_EL2_WXN	(1 << 19)
> -#define SCTLR_EL2_I	(1 << 12)
> -#define SCTLR_EL2_SA	(1 << 3)
> -#define SCTLR_EL2_C	(1 << 2)
> -#define SCTLR_EL2_A	(1 << 1)
> -#define SCTLR_EL2_M	1
> -#define SCTLR_EL2_FLAGS	(SCTLR_EL2_M | SCTLR_EL2_A | SCTLR_EL2_C |	\
> -			 SCTLR_EL2_SA | SCTLR_EL2_I)

SCTLR_EL2_FLAGS is a KVM-specific value (i.e. the SCTLR_EL2 flags which
KVM wants to set), even if it consists solely of common fields.

I believe it should stay here (with an include for <asm/sysreg.h>),
perhaps with a KVM_ prefix to imply it's not as generic as one might
assume it is.

> -
>  /* TCR_EL2 Registers bits */
>  #define TCR_EL2_RES1	((1 << 31) | (1 << 23))
>  #define TCR_EL2_TBI	(1 << 20)
> diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
> index d48ab5b..109d46e 100644
> --- a/arch/arm64/include/asm/sysreg.h
> +++ b/arch/arm64/include/asm/sysreg.h
> @@ -80,10 +80,21 @@
>  #define SET_PSTATE_PAN(x) __inst_arm(0xd5000000 | REG_PSTATE_PAN_IMM |\
>  				     (!!x)<<8 | 0x1f)
>  
> -/* SCTLR_EL1 */
> -#define SCTLR_EL1_CP15BEN	(0x1 << 5)
> -#define SCTLR_EL1_SED		(0x1 << 8)
> -#define SCTLR_EL1_SPAN		(0x1 << 23)
> +/* Common SCTLR_ELx flags. */
> +#define SCTLR_ELx_EE    (1 << 25)
> +#define SCTLR_ELx_I	(1 << 12)
> +#define SCTLR_ELx_SA	(1 << 3)
> +#define SCTLR_ELx_C	(1 << 2)
> +#define SCTLR_ELx_A	(1 << 1)
> +#define SCTLR_ELx_M	1

For consistency, (1 << 0) would be preferable.

> +
> +#define SCTLR_ELx_FLAGS	(SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \
> +			 SCTLR_ELx_SA | SCTLR_ELx_I)
> +
> +/* SCTLR_EL1 specific flags. */
> +#define SCTLR_EL1_SPAN		(1 << 23)
> +#define SCTLR_EL1_SED		(1 << 8)
> +#define SCTLR_EL1_CP15BEN	(1 << 5)
>  
>  
>  /* id_aa64isar0 */
> diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
> index 178ba22..1d7e502 100644
> --- a/arch/arm64/kvm/hyp-init.S
> +++ b/arch/arm64/kvm/hyp-init.S
> @@ -20,7 +20,7 @@
>  #include <asm/assembler.h>
>  #include <asm/kvm_arm.h>
>  #include <asm/kvm_mmu.h>
> -#include <asm/pgtable-hwdef.h>
> +#include <asm/sysreg.h>
>  
>  	.text
>  	.pushsection	.hyp.idmap.text, "ax"
> @@ -105,8 +105,8 @@ __do_hyp_init:
>  	dsb	sy
>  
>  	mrs	x4, sctlr_el2
> -	and	x4, x4, #SCTLR_EL2_EE	// preserve endianness of EL2
> -	ldr	x5, =SCTLR_EL2_FLAGS
> +	and	x4, x4, #SCTLR_ELx_EE	// preserve endianness of EL2
> +	ldr	x5, =SCTLR_ELx_FLAGS

Marc, Christoffer, I note that in SCTLR_EL2_FLAGS we don't set the RES1
bits of SCTLR_EL2 (not in head.S el2_setup). Should we perhaps be doing
so so as to avoid any future surprises?

Thanks,
Mark.

>  	orr	x4, x4, x5
>  	msr	sctlr_el2, x4
>  	isb
> -- 
> 2.5.0
> 
> 

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 04/19] arm64: Cleanup SCTLR flags
@ 2016-01-15 20:07     ` Mark Rutland
  0 siblings, 0 replies; 174+ messages in thread
From: Mark Rutland @ 2016-01-15 20:07 UTC (permalink / raw)
  To: Geoff Levand, marc.zyngier, christoffer.dall
  Cc: Catalin Marinas, Will Deacon, AKASHI Takahiro, James Morse,
	kexec, linux-arm-kernel

[Adding Marc as this touches KVM code]

On Fri, Jan 15, 2016 at 07:18:37PM +0000, Geoff Levand wrote:
> We currently have macros defining flags for the arm64 sctlr registers in both
> kvm_arm.h and sysreg.h.  To clean things up and simplify move the definitions
> of the SCTLR_EL2 flags from kvm_arm.h to sysreg.h, rename any SCTLR_EL1 or
> SCTLR_EL2 flags that are common to both registers to be SCTLR_ELx, with 'x'
> indicating a common flag, and fixup all files to include the proper header or
> to use the new macro names.

I am certainly in favour of having consistently named and located macros
for register fields.

> Signed-off-by: Geoff Levand <geoff@infradead.org>
> ---
>  arch/arm64/include/asm/kvm_arm.h | 11 -----------
>  arch/arm64/include/asm/sysreg.h  | 19 +++++++++++++++----
>  arch/arm64/kvm/hyp-init.S        |  6 +++---
>  3 files changed, 18 insertions(+), 18 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
> index 5e6857b..92ef6f6 100644
> --- a/arch/arm64/include/asm/kvm_arm.h
> +++ b/arch/arm64/include/asm/kvm_arm.h
> @@ -83,17 +83,6 @@
>  #define HCR_INT_OVERRIDE   (HCR_FMO | HCR_IMO)
>  
>  
> -/* Hyp System Control Register (SCTLR_EL2) bits */
> -#define SCTLR_EL2_EE	(1 << 25)
> -#define SCTLR_EL2_WXN	(1 << 19)
> -#define SCTLR_EL2_I	(1 << 12)
> -#define SCTLR_EL2_SA	(1 << 3)
> -#define SCTLR_EL2_C	(1 << 2)
> -#define SCTLR_EL2_A	(1 << 1)
> -#define SCTLR_EL2_M	1
> -#define SCTLR_EL2_FLAGS	(SCTLR_EL2_M | SCTLR_EL2_A | SCTLR_EL2_C |	\
> -			 SCTLR_EL2_SA | SCTLR_EL2_I)

SCTLR_EL2_FLAGS is a KVM-specific value (i.e. the SCTLR_EL2 flags which
KVM wants to set), even if it consists solely of common fields.

I believe it should stay here (with an include for <asm/sysreg.h>),
perhaps with a KVM_ prefix to imply it's not as generic as one might
assume it is.

> -
>  /* TCR_EL2 Registers bits */
>  #define TCR_EL2_RES1	((1 << 31) | (1 << 23))
>  #define TCR_EL2_TBI	(1 << 20)
> diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
> index d48ab5b..109d46e 100644
> --- a/arch/arm64/include/asm/sysreg.h
> +++ b/arch/arm64/include/asm/sysreg.h
> @@ -80,10 +80,21 @@
>  #define SET_PSTATE_PAN(x) __inst_arm(0xd5000000 | REG_PSTATE_PAN_IMM |\
>  				     (!!x)<<8 | 0x1f)
>  
> -/* SCTLR_EL1 */
> -#define SCTLR_EL1_CP15BEN	(0x1 << 5)
> -#define SCTLR_EL1_SED		(0x1 << 8)
> -#define SCTLR_EL1_SPAN		(0x1 << 23)
> +/* Common SCTLR_ELx flags. */
> +#define SCTLR_ELx_EE    (1 << 25)
> +#define SCTLR_ELx_I	(1 << 12)
> +#define SCTLR_ELx_SA	(1 << 3)
> +#define SCTLR_ELx_C	(1 << 2)
> +#define SCTLR_ELx_A	(1 << 1)
> +#define SCTLR_ELx_M	1

For consistency, (1 << 0) would be preferable.

> +
> +#define SCTLR_ELx_FLAGS	(SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \
> +			 SCTLR_ELx_SA | SCTLR_ELx_I)
> +
> +/* SCTLR_EL1 specific flags. */
> +#define SCTLR_EL1_SPAN		(1 << 23)
> +#define SCTLR_EL1_SED		(1 << 8)
> +#define SCTLR_EL1_CP15BEN	(1 << 5)
>  
>  
>  /* id_aa64isar0 */
> diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
> index 178ba22..1d7e502 100644
> --- a/arch/arm64/kvm/hyp-init.S
> +++ b/arch/arm64/kvm/hyp-init.S
> @@ -20,7 +20,7 @@
>  #include <asm/assembler.h>
>  #include <asm/kvm_arm.h>
>  #include <asm/kvm_mmu.h>
> -#include <asm/pgtable-hwdef.h>
> +#include <asm/sysreg.h>
>  
>  	.text
>  	.pushsection	.hyp.idmap.text, "ax"
> @@ -105,8 +105,8 @@ __do_hyp_init:
>  	dsb	sy
>  
>  	mrs	x4, sctlr_el2
> -	and	x4, x4, #SCTLR_EL2_EE	// preserve endianness of EL2
> -	ldr	x5, =SCTLR_EL2_FLAGS
> +	and	x4, x4, #SCTLR_ELx_EE	// preserve endianness of EL2
> +	ldr	x5, =SCTLR_ELx_FLAGS

Marc, Christoffer, I note that in SCTLR_EL2_FLAGS we don't set the RES1
bits of SCTLR_EL2 (not in head.S el2_setup). Should we perhaps be doing
so so as to avoid any future surprises?

Thanks,
Mark.

>  	orr	x4, x4, x5
>  	msr	sctlr_el2, x4
>  	isb
> -- 
> 2.5.0
> 
> 

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 18/19] arm64: kdump: update a kernel doc
  2016-01-15 19:18   ` Geoff Levand
@ 2016-01-15 20:16     ` Mark Rutland
  -1 siblings, 0 replies; 174+ messages in thread
From: Mark Rutland @ 2016-01-15 20:16 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, Jan 15, 2016 at 07:18:38PM +0000, Geoff Levand wrote:
> From: AKASHI Takahiro <takahiro.akashi@linaro.org>
> 
> This patch adds arch specific descriptions about kdump usage on arm64
> to kdump.txt.
> 
> Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
> ---
>  Documentation/kdump/kdump.txt | 23 ++++++++++++++++++++++-
>  1 file changed, 22 insertions(+), 1 deletion(-)
> 
> diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
> index bc4bd5a..36cf978 100644
> --- a/Documentation/kdump/kdump.txt
> +++ b/Documentation/kdump/kdump.txt
> @@ -18,7 +18,7 @@ memory image to a dump file on the local disk, or across the network to
>  a remote system.
>  
>  Kdump and kexec are currently supported on the x86, x86_64, ppc64, ia64,
> -s390x and arm architectures.
> +s390x, arm and arm64 architectures.
>  
>  When the system kernel boots, it reserves a small section of memory for
>  the dump-capture kernel. This ensures that ongoing Direct Memory Access
> @@ -249,6 +249,20 @@ Dump-capture kernel config options (Arch Dependent, arm)
>  
>      AUTO_ZRELADDR=y
>  
> +Dump-capture kernel config options (Arch Dependent, arm64)
> +----------------------------------------------------------
> +
> +1) The maximum memory size on the dump-capture kernel must be limited by
> +   specifying:
> +
> +   mem=X[MG]
> +
> +   where X should be less than or equal to the size in "crashkernel="
> +   boot parameter. Kexec-tools will automatically add this.


This is extremely fragile, and will trivially fail when the kernel can
be loaded anywhere (see [1]).

We must explicitly describe the set of regions the crash kernel may use
(i.e. we need base and size). NAK in the absence of that.

Thanks,
Mark.

> +
> +2) Currently, kvm will not be enabled on the dump-capture kernel even
> +   if it is configured.
> +
>  Extended crashkernel syntax
>  ===========================
>  
> @@ -312,6 +326,8 @@ Boot into System Kernel
>     any space below the alignment point may be overwritten by the dump-capture kernel,
>     which means it is possible that the vmcore is not that precise as expected.
>  
> +   On arm64, use "crashkernel=Y[@X]".  Note that the start address of
> +   the kernel, X if explicitly specified, must be aligned to 2MiB (0x200000).
>  
>  Load the Dump-capture Kernel
>  ============================
> @@ -334,6 +350,8 @@ For s390x:
>  	- Use image or bzImage
>  For arm:
>  	- Use zImage
> +For arm64:
> +	- Use vmlinux or Image
>  
>  If you are using a uncompressed vmlinux image then use following command
>  to load dump-capture kernel.
> @@ -377,6 +395,9 @@ For s390x:
>  For arm:
>  	"1 maxcpus=1 reset_devices"
>  
> +For arm64:
> +	"1 mem=X[MG] maxcpus=1 reset_devices"
> +
>  Notes on loading the dump-capture kernel:
>  
>  * By default, the ELF headers are stored in ELF64 format to support
> -- 
> 2.5.0
> 
> 

[1] http://lists.infradead.org/pipermail/linux-arm-kernel/2016-January/398527.html

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 18/19] arm64: kdump: update a kernel doc
@ 2016-01-15 20:16     ` Mark Rutland
  0 siblings, 0 replies; 174+ messages in thread
From: Mark Rutland @ 2016-01-15 20:16 UTC (permalink / raw)
  To: Geoff Levand
  Cc: ard.biesheuvel, marc.zyngier, Catalin Marinas, Will Deacon,
	AKASHI Takahiro, James Morse, linux-arm-kernel, kexec,
	christoffer.dall

On Fri, Jan 15, 2016 at 07:18:38PM +0000, Geoff Levand wrote:
> From: AKASHI Takahiro <takahiro.akashi@linaro.org>
> 
> This patch adds arch specific descriptions about kdump usage on arm64
> to kdump.txt.
> 
> Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
> ---
>  Documentation/kdump/kdump.txt | 23 ++++++++++++++++++++++-
>  1 file changed, 22 insertions(+), 1 deletion(-)
> 
> diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
> index bc4bd5a..36cf978 100644
> --- a/Documentation/kdump/kdump.txt
> +++ b/Documentation/kdump/kdump.txt
> @@ -18,7 +18,7 @@ memory image to a dump file on the local disk, or across the network to
>  a remote system.
>  
>  Kdump and kexec are currently supported on the x86, x86_64, ppc64, ia64,
> -s390x and arm architectures.
> +s390x, arm and arm64 architectures.
>  
>  When the system kernel boots, it reserves a small section of memory for
>  the dump-capture kernel. This ensures that ongoing Direct Memory Access
> @@ -249,6 +249,20 @@ Dump-capture kernel config options (Arch Dependent, arm)
>  
>      AUTO_ZRELADDR=y
>  
> +Dump-capture kernel config options (Arch Dependent, arm64)
> +----------------------------------------------------------
> +
> +1) The maximum memory size on the dump-capture kernel must be limited by
> +   specifying:
> +
> +   mem=X[MG]
> +
> +   where X should be less than or equal to the size in "crashkernel="
> +   boot parameter. Kexec-tools will automatically add this.


This is extremely fragile, and will trivially fail when the kernel can
be loaded anywhere (see [1]).

We must explicitly describe the set of regions the crash kernel may use
(i.e. we need base and size). NAK in the absence of that.

Thanks,
Mark.

> +
> +2) Currently, kvm will not be enabled on the dump-capture kernel even
> +   if it is configured.
> +
>  Extended crashkernel syntax
>  ===========================
>  
> @@ -312,6 +326,8 @@ Boot into System Kernel
>     any space below the alignment point may be overwritten by the dump-capture kernel,
>     which means it is possible that the vmcore is not that precise as expected.
>  
> +   On arm64, use "crashkernel=Y[@X]".  Note that the start address of
> +   the kernel, X if explicitly specified, must be aligned to 2MiB (0x200000).
>  
>  Load the Dump-capture Kernel
>  ============================
> @@ -334,6 +350,8 @@ For s390x:
>  	- Use image or bzImage
>  For arm:
>  	- Use zImage
> +For arm64:
> +	- Use vmlinux or Image
>  
>  If you are using a uncompressed vmlinux image then use following command
>  to load dump-capture kernel.
> @@ -377,6 +395,9 @@ For s390x:
>  For arm:
>  	"1 maxcpus=1 reset_devices"
>  
> +For arm64:
> +	"1 mem=X[MG] maxcpus=1 reset_devices"
> +
>  Notes on loading the dump-capture kernel:
>  
>  * By default, the ELF headers are stored in ELF64 format to support
> -- 
> 2.5.0
> 
> 

[1] http://lists.infradead.org/pipermail/linux-arm-kernel/2016-January/398527.html

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 02/19] arm64: kernel: Include _AC definition in page.h
  2016-01-15 19:18   ` Geoff Levand
@ 2016-01-18 10:05     ` Mark Rutland
  -1 siblings, 0 replies; 174+ messages in thread
From: Mark Rutland @ 2016-01-18 10:05 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, Jan 15, 2016 at 07:18:37PM +0000, Geoff Levand wrote:
> From: James Morse <james.morse@arm.com>
> 
> From: James Morse <james.morse@arm.com>

Not sure why this appears multiple times.

> page.h uses '_AC' in the definition of PAGE_SIZE, but doesn't include
> linux/const.h where this is defined. This produces build warnings when only
> asm/page.h is included by asm code.
> 
> Signed-off-by: James Morse <james.morse@arm.com>
> Acked-by: Pavel Machek <pavel@ucw.cz>
> Signed-off-by: Geoff Levand <geoff@infradead.org>

This is sensible even in isolation, so FWIW:

Acked-by: Mark Rutland <mark.rutland@arm.com>

I note that for the !__ASSEMBLY__ portion we use current,
READ_IMPLIES_EXEC, and some VM_* flags, without including the headers
those are defined in. It might be worth fixing those up also.

Mark.

> ---
>  arch/arm64/include/asm/page.h | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
> index 9b2f5a9..fbafd0a 100644
> --- a/arch/arm64/include/asm/page.h
> +++ b/arch/arm64/include/asm/page.h
> @@ -19,6 +19,8 @@
>  #ifndef __ASM_PAGE_H
>  #define __ASM_PAGE_H
>  
> +#include <linux/const.h>
> +
>  /* PAGE_SHIFT determines the page size */
>  /* CONT_SHIFT determines the number of pages which can be tracked together  */
>  #ifdef CONFIG_ARM64_64K_PAGES
> -- 
> 2.5.0
> 
> 

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 02/19] arm64: kernel: Include _AC definition in page.h
@ 2016-01-18 10:05     ` Mark Rutland
  0 siblings, 0 replies; 174+ messages in thread
From: Mark Rutland @ 2016-01-18 10:05 UTC (permalink / raw)
  To: Geoff Levand, James Morse
  Cc: marc.zyngier, Catalin Marinas, Will Deacon, AKASHI Takahiro,
	linux-arm-kernel, kexec, christoffer.dall

On Fri, Jan 15, 2016 at 07:18:37PM +0000, Geoff Levand wrote:
> From: James Morse <james.morse@arm.com>
> 
> From: James Morse <james.morse@arm.com>

Not sure why this appears multiple times.

> page.h uses '_AC' in the definition of PAGE_SIZE, but doesn't include
> linux/const.h where this is defined. This produces build warnings when only
> asm/page.h is included by asm code.
> 
> Signed-off-by: James Morse <james.morse@arm.com>
> Acked-by: Pavel Machek <pavel@ucw.cz>
> Signed-off-by: Geoff Levand <geoff@infradead.org>

This is sensible even in isolation, so FWIW:

Acked-by: Mark Rutland <mark.rutland@arm.com>

I note that for the !__ASSEMBLY__ portion we use current,
READ_IMPLIES_EXEC, and some VM_* flags, without including the headers
those are defined in. It might be worth fixing those up also.

Mark.

> ---
>  arch/arm64/include/asm/page.h | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
> index 9b2f5a9..fbafd0a 100644
> --- a/arch/arm64/include/asm/page.h
> +++ b/arch/arm64/include/asm/page.h
> @@ -19,6 +19,8 @@
>  #ifndef __ASM_PAGE_H
>  #define __ASM_PAGE_H
>  
> +#include <linux/const.h>
> +
>  /* PAGE_SHIFT determines the page size */
>  /* CONT_SHIFT determines the number of pages which can be tracked together  */
>  #ifdef CONFIG_ARM64_64K_PAGES
> -- 
> 2.5.0
> 
> 

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 04/19] arm64: Cleanup SCTLR flags
  2016-01-15 20:07     ` Mark Rutland
@ 2016-01-18 10:12       ` Marc Zyngier
  -1 siblings, 0 replies; 174+ messages in thread
From: Marc Zyngier @ 2016-01-18 10:12 UTC (permalink / raw)
  To: linux-arm-kernel

On 15/01/16 20:07, Mark Rutland wrote:
> [Adding Marc as this touches KVM code]
> 
> On Fri, Jan 15, 2016 at 07:18:37PM +0000, Geoff Levand wrote:
>> We currently have macros defining flags for the arm64 sctlr registers in both
>> kvm_arm.h and sysreg.h.  To clean things up and simplify move the definitions
>> of the SCTLR_EL2 flags from kvm_arm.h to sysreg.h, rename any SCTLR_EL1 or
>> SCTLR_EL2 flags that are common to both registers to be SCTLR_ELx, with 'x'
>> indicating a common flag, and fixup all files to include the proper header or
>> to use the new macro names.
> 
> I am certainly in favour of having consistently named and located macros
> for register fields.
> 
>> Signed-off-by: Geoff Levand <geoff@infradead.org>
>> ---
>>  arch/arm64/include/asm/kvm_arm.h | 11 -----------
>>  arch/arm64/include/asm/sysreg.h  | 19 +++++++++++++++----
>>  arch/arm64/kvm/hyp-init.S        |  6 +++---
>>  3 files changed, 18 insertions(+), 18 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
>> index 5e6857b..92ef6f6 100644
>> --- a/arch/arm64/include/asm/kvm_arm.h
>> +++ b/arch/arm64/include/asm/kvm_arm.h
>> @@ -83,17 +83,6 @@
>>  #define HCR_INT_OVERRIDE   (HCR_FMO | HCR_IMO)
>>  
>>  
>> -/* Hyp System Control Register (SCTLR_EL2) bits */
>> -#define SCTLR_EL2_EE	(1 << 25)
>> -#define SCTLR_EL2_WXN	(1 << 19)
>> -#define SCTLR_EL2_I	(1 << 12)
>> -#define SCTLR_EL2_SA	(1 << 3)
>> -#define SCTLR_EL2_C	(1 << 2)
>> -#define SCTLR_EL2_A	(1 << 1)
>> -#define SCTLR_EL2_M	1
>> -#define SCTLR_EL2_FLAGS	(SCTLR_EL2_M | SCTLR_EL2_A | SCTLR_EL2_C |	\
>> -			 SCTLR_EL2_SA | SCTLR_EL2_I)
> 
> SCTLR_EL2_FLAGS is a KVM-specific value (i.e. the SCTLR_EL2 flags which
> KVM wants to set), even if it consists solely of common fields.
> 
> I believe it should stay here (with an include for <asm/sysreg.h>),
> perhaps with a KVM_ prefix to imply it's not as generic as one might
> assume it is.
> 
>> -
>>  /* TCR_EL2 Registers bits */
>>  #define TCR_EL2_RES1	((1 << 31) | (1 << 23))
>>  #define TCR_EL2_TBI	(1 << 20)
>> diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
>> index d48ab5b..109d46e 100644
>> --- a/arch/arm64/include/asm/sysreg.h
>> +++ b/arch/arm64/include/asm/sysreg.h
>> @@ -80,10 +80,21 @@
>>  #define SET_PSTATE_PAN(x) __inst_arm(0xd5000000 | REG_PSTATE_PAN_IMM |\
>>  				     (!!x)<<8 | 0x1f)
>>  
>> -/* SCTLR_EL1 */
>> -#define SCTLR_EL1_CP15BEN	(0x1 << 5)
>> -#define SCTLR_EL1_SED		(0x1 << 8)
>> -#define SCTLR_EL1_SPAN		(0x1 << 23)
>> +/* Common SCTLR_ELx flags. */
>> +#define SCTLR_ELx_EE    (1 << 25)
>> +#define SCTLR_ELx_I	(1 << 12)
>> +#define SCTLR_ELx_SA	(1 << 3)
>> +#define SCTLR_ELx_C	(1 << 2)
>> +#define SCTLR_ELx_A	(1 << 1)
>> +#define SCTLR_ELx_M	1
> 
> For consistency, (1 << 0) would be preferable.
> 
>> +
>> +#define SCTLR_ELx_FLAGS	(SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \
>> +			 SCTLR_ELx_SA | SCTLR_ELx_I)
>> +
>> +/* SCTLR_EL1 specific flags. */
>> +#define SCTLR_EL1_SPAN		(1 << 23)
>> +#define SCTLR_EL1_SED		(1 << 8)
>> +#define SCTLR_EL1_CP15BEN	(1 << 5)
>>  
>>  
>>  /* id_aa64isar0 */
>> diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
>> index 178ba22..1d7e502 100644
>> --- a/arch/arm64/kvm/hyp-init.S
>> +++ b/arch/arm64/kvm/hyp-init.S
>> @@ -20,7 +20,7 @@
>>  #include <asm/assembler.h>
>>  #include <asm/kvm_arm.h>
>>  #include <asm/kvm_mmu.h>
>> -#include <asm/pgtable-hwdef.h>
>> +#include <asm/sysreg.h>
>>  
>>  	.text
>>  	.pushsection	.hyp.idmap.text, "ax"
>> @@ -105,8 +105,8 @@ __do_hyp_init:
>>  	dsb	sy
>>  
>>  	mrs	x4, sctlr_el2
>> -	and	x4, x4, #SCTLR_EL2_EE	// preserve endianness of EL2
>> -	ldr	x5, =SCTLR_EL2_FLAGS
>> +	and	x4, x4, #SCTLR_ELx_EE	// preserve endianness of EL2
>> +	ldr	x5, =SCTLR_ELx_FLAGS
> 
> Marc, Christoffer, I note that in SCTLR_EL2_FLAGS we don't set the RES1
> bits of SCTLR_EL2 (not in head.S el2_setup). Should we perhaps be doing
> so so as to avoid any future surprises?

Yes, that's one of the numerous instances of the same problem - I think
Dave Martin also has some fixes in that area.

I'll definitely take patches!

	M.
-- 
Jazz is not dead. It just smells funny...

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 04/19] arm64: Cleanup SCTLR flags
@ 2016-01-18 10:12       ` Marc Zyngier
  0 siblings, 0 replies; 174+ messages in thread
From: Marc Zyngier @ 2016-01-18 10:12 UTC (permalink / raw)
  To: Mark Rutland, Geoff Levand, christoffer.dall
  Cc: Catalin Marinas, Will Deacon, AKASHI Takahiro, James Morse,
	kexec, linux-arm-kernel

On 15/01/16 20:07, Mark Rutland wrote:
> [Adding Marc as this touches KVM code]
> 
> On Fri, Jan 15, 2016 at 07:18:37PM +0000, Geoff Levand wrote:
>> We currently have macros defining flags for the arm64 sctlr registers in both
>> kvm_arm.h and sysreg.h.  To clean things up and simplify move the definitions
>> of the SCTLR_EL2 flags from kvm_arm.h to sysreg.h, rename any SCTLR_EL1 or
>> SCTLR_EL2 flags that are common to both registers to be SCTLR_ELx, with 'x'
>> indicating a common flag, and fixup all files to include the proper header or
>> to use the new macro names.
> 
> I am certainly in favour of having consistently named and located macros
> for register fields.
> 
>> Signed-off-by: Geoff Levand <geoff@infradead.org>
>> ---
>>  arch/arm64/include/asm/kvm_arm.h | 11 -----------
>>  arch/arm64/include/asm/sysreg.h  | 19 +++++++++++++++----
>>  arch/arm64/kvm/hyp-init.S        |  6 +++---
>>  3 files changed, 18 insertions(+), 18 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
>> index 5e6857b..92ef6f6 100644
>> --- a/arch/arm64/include/asm/kvm_arm.h
>> +++ b/arch/arm64/include/asm/kvm_arm.h
>> @@ -83,17 +83,6 @@
>>  #define HCR_INT_OVERRIDE   (HCR_FMO | HCR_IMO)
>>  
>>  
>> -/* Hyp System Control Register (SCTLR_EL2) bits */
>> -#define SCTLR_EL2_EE	(1 << 25)
>> -#define SCTLR_EL2_WXN	(1 << 19)
>> -#define SCTLR_EL2_I	(1 << 12)
>> -#define SCTLR_EL2_SA	(1 << 3)
>> -#define SCTLR_EL2_C	(1 << 2)
>> -#define SCTLR_EL2_A	(1 << 1)
>> -#define SCTLR_EL2_M	1
>> -#define SCTLR_EL2_FLAGS	(SCTLR_EL2_M | SCTLR_EL2_A | SCTLR_EL2_C |	\
>> -			 SCTLR_EL2_SA | SCTLR_EL2_I)
> 
> SCTLR_EL2_FLAGS is a KVM-specific value (i.e. the SCTLR_EL2 flags which
> KVM wants to set), even if it consists solely of common fields.
> 
> I believe it should stay here (with an include for <asm/sysreg.h>),
> perhaps with a KVM_ prefix to imply it's not as generic as one might
> assume it is.
> 
>> -
>>  /* TCR_EL2 Registers bits */
>>  #define TCR_EL2_RES1	((1 << 31) | (1 << 23))
>>  #define TCR_EL2_TBI	(1 << 20)
>> diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
>> index d48ab5b..109d46e 100644
>> --- a/arch/arm64/include/asm/sysreg.h
>> +++ b/arch/arm64/include/asm/sysreg.h
>> @@ -80,10 +80,21 @@
>>  #define SET_PSTATE_PAN(x) __inst_arm(0xd5000000 | REG_PSTATE_PAN_IMM |\
>>  				     (!!x)<<8 | 0x1f)
>>  
>> -/* SCTLR_EL1 */
>> -#define SCTLR_EL1_CP15BEN	(0x1 << 5)
>> -#define SCTLR_EL1_SED		(0x1 << 8)
>> -#define SCTLR_EL1_SPAN		(0x1 << 23)
>> +/* Common SCTLR_ELx flags. */
>> +#define SCTLR_ELx_EE    (1 << 25)
>> +#define SCTLR_ELx_I	(1 << 12)
>> +#define SCTLR_ELx_SA	(1 << 3)
>> +#define SCTLR_ELx_C	(1 << 2)
>> +#define SCTLR_ELx_A	(1 << 1)
>> +#define SCTLR_ELx_M	1
> 
> For consistency, (1 << 0) would be preferable.
> 
>> +
>> +#define SCTLR_ELx_FLAGS	(SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \
>> +			 SCTLR_ELx_SA | SCTLR_ELx_I)
>> +
>> +/* SCTLR_EL1 specific flags. */
>> +#define SCTLR_EL1_SPAN		(1 << 23)
>> +#define SCTLR_EL1_SED		(1 << 8)
>> +#define SCTLR_EL1_CP15BEN	(1 << 5)
>>  
>>  
>>  /* id_aa64isar0 */
>> diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
>> index 178ba22..1d7e502 100644
>> --- a/arch/arm64/kvm/hyp-init.S
>> +++ b/arch/arm64/kvm/hyp-init.S
>> @@ -20,7 +20,7 @@
>>  #include <asm/assembler.h>
>>  #include <asm/kvm_arm.h>
>>  #include <asm/kvm_mmu.h>
>> -#include <asm/pgtable-hwdef.h>
>> +#include <asm/sysreg.h>
>>  
>>  	.text
>>  	.pushsection	.hyp.idmap.text, "ax"
>> @@ -105,8 +105,8 @@ __do_hyp_init:
>>  	dsb	sy
>>  
>>  	mrs	x4, sctlr_el2
>> -	and	x4, x4, #SCTLR_EL2_EE	// preserve endianness of EL2
>> -	ldr	x5, =SCTLR_EL2_FLAGS
>> +	and	x4, x4, #SCTLR_ELx_EE	// preserve endianness of EL2
>> +	ldr	x5, =SCTLR_ELx_FLAGS
> 
> Marc, Christoffer, I note that in SCTLR_EL2_FLAGS we don't set the RES1
> bits of SCTLR_EL2 (not in head.S el2_setup). Should we perhaps be doing
> so so as to avoid any future surprises?

Yes, that's one of the numerous instances of the same problem - I think
Dave Martin also has some fixes in that area.

I'll definitely take patches!

	M.
-- 
Jazz is not dead. It just smells funny...

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 18/19] arm64: kdump: update a kernel doc
  2016-01-15 20:16     ` Mark Rutland
@ 2016-01-18 10:26       ` AKASHI Takahiro
  -1 siblings, 0 replies; 174+ messages in thread
From: AKASHI Takahiro @ 2016-01-18 10:26 UTC (permalink / raw)
  To: linux-arm-kernel

On 01/16/2016 05:16 AM, Mark Rutland wrote:
> On Fri, Jan 15, 2016 at 07:18:38PM +0000, Geoff Levand wrote:
>> From: AKASHI Takahiro <takahiro.akashi@linaro.org>
>>
>> This patch adds arch specific descriptions about kdump usage on arm64
>> to kdump.txt.
>>
>> Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
>> ---
>>   Documentation/kdump/kdump.txt | 23 ++++++++++++++++++++++-
>>   1 file changed, 22 insertions(+), 1 deletion(-)
>>
>> diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
>> index bc4bd5a..36cf978 100644
>> --- a/Documentation/kdump/kdump.txt
>> +++ b/Documentation/kdump/kdump.txt
>> @@ -18,7 +18,7 @@ memory image to a dump file on the local disk, or across the network to
>>   a remote system.
>>
>>   Kdump and kexec are currently supported on the x86, x86_64, ppc64, ia64,
>> -s390x and arm architectures.
>> +s390x, arm and arm64 architectures.
>>
>>   When the system kernel boots, it reserves a small section of memory for
>>   the dump-capture kernel. This ensures that ongoing Direct Memory Access
>> @@ -249,6 +249,20 @@ Dump-capture kernel config options (Arch Dependent, arm)
>>
>>       AUTO_ZRELADDR=y
>>
>> +Dump-capture kernel config options (Arch Dependent, arm64)
>> +----------------------------------------------------------
>> +
>> +1) The maximum memory size on the dump-capture kernel must be limited by
>> +   specifying:
>> +
>> +   mem=X[MG]
>> +
>> +   where X should be less than or equal to the size in "crashkernel="
>> +   boot parameter. Kexec-tools will automatically add this.
>
>
> This is extremely fragile, and will trivially fail when the kernel can
> be loaded anywhere (see [1]).

As I said before, this restriction also exists on arm, but I understand
that recent Ard's patches break it.

> We must explicitly describe the set of regions the crash kernel may use
> (i.e. we need base and size). NAK in the absence of that.

There seem to exist several approaches:
(a) use a device-tree property, "linux,usable-memory", in addition to "reg"
     under "memory" node
(b) use a kernel's early parameter, "memmap=nn[@#$]ss"

Power PC takes (a), while this does not work on efi-started kernel
because dtb has no "memory" nodes under efi.
X86 takes (b). If we take this, we will need to overwrite a weak
early_init_dt_add_memory().
(I thought that this approach was not smart as we have three different
ways to specify memory regions, dtb, efi and this kernel parameter.)

Do you have any other ideas?

Thanks,
-Takahiro AKASHI


> Thanks,
> Mark.
>
>> +
>> +2) Currently, kvm will not be enabled on the dump-capture kernel even
>> +   if it is configured.
>> +
>>   Extended crashkernel syntax
>>   ===========================
>>
>> @@ -312,6 +326,8 @@ Boot into System Kernel
>>      any space below the alignment point may be overwritten by the dump-capture kernel,
>>      which means it is possible that the vmcore is not that precise as expected.
>>
>> +   On arm64, use "crashkernel=Y[@X]".  Note that the start address of
>> +   the kernel, X if explicitly specified, must be aligned to 2MiB (0x200000).
>>
>>   Load the Dump-capture Kernel
>>   ============================
>> @@ -334,6 +350,8 @@ For s390x:
>>   	- Use image or bzImage
>>   For arm:
>>   	- Use zImage
>> +For arm64:
>> +	- Use vmlinux or Image
>>
>>   If you are using a uncompressed vmlinux image then use following command
>>   to load dump-capture kernel.
>> @@ -377,6 +395,9 @@ For s390x:
>>   For arm:
>>   	"1 maxcpus=1 reset_devices"
>>
>> +For arm64:
>> +	"1 mem=X[MG] maxcpus=1 reset_devices"
>> +
>>   Notes on loading the dump-capture kernel:
>>
>>   * By default, the ELF headers are stored in ELF64 format to support
>> --
>> 2.5.0
>>
>>
>
> [1] http://lists.infradead.org/pipermail/linux-arm-kernel/2016-January/398527.html
>

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 18/19] arm64: kdump: update a kernel doc
@ 2016-01-18 10:26       ` AKASHI Takahiro
  0 siblings, 0 replies; 174+ messages in thread
From: AKASHI Takahiro @ 2016-01-18 10:26 UTC (permalink / raw)
  To: Mark Rutland, Geoff Levand
  Cc: ard.biesheuvel, marc.zyngier, Catalin Marinas, Will Deacon,
	James Morse, linux-arm-kernel, kexec, christoffer.dall

On 01/16/2016 05:16 AM, Mark Rutland wrote:
> On Fri, Jan 15, 2016 at 07:18:38PM +0000, Geoff Levand wrote:
>> From: AKASHI Takahiro <takahiro.akashi@linaro.org>
>>
>> This patch adds arch specific descriptions about kdump usage on arm64
>> to kdump.txt.
>>
>> Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
>> ---
>>   Documentation/kdump/kdump.txt | 23 ++++++++++++++++++++++-
>>   1 file changed, 22 insertions(+), 1 deletion(-)
>>
>> diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
>> index bc4bd5a..36cf978 100644
>> --- a/Documentation/kdump/kdump.txt
>> +++ b/Documentation/kdump/kdump.txt
>> @@ -18,7 +18,7 @@ memory image to a dump file on the local disk, or across the network to
>>   a remote system.
>>
>>   Kdump and kexec are currently supported on the x86, x86_64, ppc64, ia64,
>> -s390x and arm architectures.
>> +s390x, arm and arm64 architectures.
>>
>>   When the system kernel boots, it reserves a small section of memory for
>>   the dump-capture kernel. This ensures that ongoing Direct Memory Access
>> @@ -249,6 +249,20 @@ Dump-capture kernel config options (Arch Dependent, arm)
>>
>>       AUTO_ZRELADDR=y
>>
>> +Dump-capture kernel config options (Arch Dependent, arm64)
>> +----------------------------------------------------------
>> +
>> +1) The maximum memory size on the dump-capture kernel must be limited by
>> +   specifying:
>> +
>> +   mem=X[MG]
>> +
>> +   where X should be less than or equal to the size in "crashkernel="
>> +   boot parameter. Kexec-tools will automatically add this.
>
>
> This is extremely fragile, and will trivially fail when the kernel can
> be loaded anywhere (see [1]).

As I said before, this restriction also exists on arm, but I understand
that recent Ard's patches break it.

> We must explicitly describe the set of regions the crash kernel may use
> (i.e. we need base and size). NAK in the absence of that.

There seem to exist several approaches:
(a) use a device-tree property, "linux,usable-memory", in addition to "reg"
     under "memory" node
(b) use a kernel's early parameter, "memmap=nn[@#$]ss"

Power PC takes (a), while this does not work on efi-started kernel
because dtb has no "memory" nodes under efi.
X86 takes (b). If we take this, we will need to overwrite a weak
early_init_dt_add_memory().
(I thought that this approach was not smart as we have three different
ways to specify memory regions, dtb, efi and this kernel parameter.)

Do you have any other ideas?

Thanks,
-Takahiro AKASHI


> Thanks,
> Mark.
>
>> +
>> +2) Currently, kvm will not be enabled on the dump-capture kernel even
>> +   if it is configured.
>> +
>>   Extended crashkernel syntax
>>   ===========================
>>
>> @@ -312,6 +326,8 @@ Boot into System Kernel
>>      any space below the alignment point may be overwritten by the dump-capture kernel,
>>      which means it is possible that the vmcore is not that precise as expected.
>>
>> +   On arm64, use "crashkernel=Y[@X]".  Note that the start address of
>> +   the kernel, X if explicitly specified, must be aligned to 2MiB (0x200000).
>>
>>   Load the Dump-capture Kernel
>>   ============================
>> @@ -334,6 +350,8 @@ For s390x:
>>   	- Use image or bzImage
>>   For arm:
>>   	- Use zImage
>> +For arm64:
>> +	- Use vmlinux or Image
>>
>>   If you are using a uncompressed vmlinux image then use following command
>>   to load dump-capture kernel.
>> @@ -377,6 +395,9 @@ For s390x:
>>   For arm:
>>   	"1 maxcpus=1 reset_devices"
>>
>> +For arm64:
>> +	"1 mem=X[MG] maxcpus=1 reset_devices"
>> +
>>   Notes on loading the dump-capture kernel:
>>
>>   * By default, the ELF headers are stored in ELF64 format to support
>> --
>> 2.5.0
>>
>>
>
> [1] http://lists.infradead.org/pipermail/linux-arm-kernel/2016-January/398527.html
>

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 18/19] arm64: kdump: update a kernel doc
  2016-01-18 10:26       ` AKASHI Takahiro
@ 2016-01-18 11:29         ` Mark Rutland
  -1 siblings, 0 replies; 174+ messages in thread
From: Mark Rutland @ 2016-01-18 11:29 UTC (permalink / raw)
  To: linux-arm-kernel

On Mon, Jan 18, 2016 at 07:26:04PM +0900, AKASHI Takahiro wrote:
> On 01/16/2016 05:16 AM, Mark Rutland wrote:
> >On Fri, Jan 15, 2016 at 07:18:38PM +0000, Geoff Levand wrote:
> >>From: AKASHI Takahiro <takahiro.akashi@linaro.org>
> >>
> >>This patch adds arch specific descriptions about kdump usage on arm64
> >>to kdump.txt.
> >>
> >>Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
> >>---
> >>  Documentation/kdump/kdump.txt | 23 ++++++++++++++++++++++-
> >>  1 file changed, 22 insertions(+), 1 deletion(-)
> >>
> >>diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
> >>index bc4bd5a..36cf978 100644
> >>--- a/Documentation/kdump/kdump.txt
> >>+++ b/Documentation/kdump/kdump.txt
> >>@@ -18,7 +18,7 @@ memory image to a dump file on the local disk, or across the network to
> >>  a remote system.
> >>
> >>  Kdump and kexec are currently supported on the x86, x86_64, ppc64, ia64,
> >>-s390x and arm architectures.
> >>+s390x, arm and arm64 architectures.
> >>
> >>  When the system kernel boots, it reserves a small section of memory for
> >>  the dump-capture kernel. This ensures that ongoing Direct Memory Access
> >>@@ -249,6 +249,20 @@ Dump-capture kernel config options (Arch Dependent, arm)
> >>
> >>      AUTO_ZRELADDR=y
> >>
> >>+Dump-capture kernel config options (Arch Dependent, arm64)
> >>+----------------------------------------------------------
> >>+
> >>+1) The maximum memory size on the dump-capture kernel must be limited by
> >>+   specifying:
> >>+
> >>+   mem=X[MG]
> >>+
> >>+   where X should be less than or equal to the size in "crashkernel="
> >>+   boot parameter. Kexec-tools will automatically add this.
> >
> >
> >This is extremely fragile, and will trivially fail when the kernel can
> >be loaded anywhere (see [1]).
> 
> As I said before, this restriction also exists on arm, but I understand
> that recent Ard's patches break it.
> 
> >We must explicitly describe the set of regions the crash kernel may use
> >(i.e. we need base and size). NAK in the absence of that.
> 
> There seem to exist several approaches:
> (a) use a device-tree property, "linux,usable-memory", in addition to "reg"

I'm not opposed to the idea of a DT property, though I think that should
live under /chosen.

I see that "linux,usable-memory" exists already, though I'm confused as
to exactly what it is for as there is no documentation (neither in the
kernel nor in ePAPR). It's also painful to alter multiple memory nodes
to use that, and I can see that going wrong.

>     under "memory" node
> (b) use a kernel's early parameter, "memmap=nn[@#$]ss"

I'm not too keen on this, as I think it's fragile, and logically
somewhat distinct from what mem= is for (a best effort testing tool).

> Power PC takes (a), while this does not work on efi-started kernel
> because dtb has no "memory" nodes under efi.

A property under /chosen would work for EFI too.

> X86 takes (b). If we take this, we will need to overwrite a weak
> early_init_dt_add_memory().
> (I thought that this approach was not smart as we have three different
> ways to specify memory regions, dtb, efi and this kernel parameter.)

I'm not sure that's a big problem. We may be able to make this generic,
also.

We don't necessarily need a weak add memory function if we can guarantee
nothing gets memblock_alloc'd before we carve it out.

Something like the nomap stuff Ard put together might be useful here.

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 18/19] arm64: kdump: update a kernel doc
@ 2016-01-18 11:29         ` Mark Rutland
  0 siblings, 0 replies; 174+ messages in thread
From: Mark Rutland @ 2016-01-18 11:29 UTC (permalink / raw)
  To: AKASHI Takahiro, ard.biesheuvel
  Cc: Geoff Levand, Catalin Marinas, Will Deacon, marc.zyngier,
	James Morse, linux-arm-kernel, kexec, christoffer.dall

On Mon, Jan 18, 2016 at 07:26:04PM +0900, AKASHI Takahiro wrote:
> On 01/16/2016 05:16 AM, Mark Rutland wrote:
> >On Fri, Jan 15, 2016 at 07:18:38PM +0000, Geoff Levand wrote:
> >>From: AKASHI Takahiro <takahiro.akashi@linaro.org>
> >>
> >>This patch adds arch specific descriptions about kdump usage on arm64
> >>to kdump.txt.
> >>
> >>Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
> >>---
> >>  Documentation/kdump/kdump.txt | 23 ++++++++++++++++++++++-
> >>  1 file changed, 22 insertions(+), 1 deletion(-)
> >>
> >>diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
> >>index bc4bd5a..36cf978 100644
> >>--- a/Documentation/kdump/kdump.txt
> >>+++ b/Documentation/kdump/kdump.txt
> >>@@ -18,7 +18,7 @@ memory image to a dump file on the local disk, or across the network to
> >>  a remote system.
> >>
> >>  Kdump and kexec are currently supported on the x86, x86_64, ppc64, ia64,
> >>-s390x and arm architectures.
> >>+s390x, arm and arm64 architectures.
> >>
> >>  When the system kernel boots, it reserves a small section of memory for
> >>  the dump-capture kernel. This ensures that ongoing Direct Memory Access
> >>@@ -249,6 +249,20 @@ Dump-capture kernel config options (Arch Dependent, arm)
> >>
> >>      AUTO_ZRELADDR=y
> >>
> >>+Dump-capture kernel config options (Arch Dependent, arm64)
> >>+----------------------------------------------------------
> >>+
> >>+1) The maximum memory size on the dump-capture kernel must be limited by
> >>+   specifying:
> >>+
> >>+   mem=X[MG]
> >>+
> >>+   where X should be less than or equal to the size in "crashkernel="
> >>+   boot parameter. Kexec-tools will automatically add this.
> >
> >
> >This is extremely fragile, and will trivially fail when the kernel can
> >be loaded anywhere (see [1]).
> 
> As I said before, this restriction also exists on arm, but I understand
> that recent Ard's patches break it.
> 
> >We must explicitly describe the set of regions the crash kernel may use
> >(i.e. we need base and size). NAK in the absence of that.
> 
> There seem to exist several approaches:
> (a) use a device-tree property, "linux,usable-memory", in addition to "reg"

I'm not opposed to the idea of a DT property, though I think that should
live under /chosen.

I see that "linux,usable-memory" exists already, though I'm confused as
to exactly what it is for as there is no documentation (neither in the
kernel nor in ePAPR). It's also painful to alter multiple memory nodes
to use that, and I can see that going wrong.

>     under "memory" node
> (b) use a kernel's early parameter, "memmap=nn[@#$]ss"

I'm not too keen on this, as I think it's fragile, and logically
somewhat distinct from what mem= is for (a best effort testing tool).

> Power PC takes (a), while this does not work on efi-started kernel
> because dtb has no "memory" nodes under efi.

A property under /chosen would work for EFI too.

> X86 takes (b). If we take this, we will need to overwrite a weak
> early_init_dt_add_memory().
> (I thought that this approach was not smart as we have three different
> ways to specify memory regions, dtb, efi and this kernel parameter.)

I'm not sure that's a big problem. We may be able to make this generic,
also.

We don't necessarily need a weak add memory function if we can guarantee
nothing gets memblock_alloc'd before we carve it out.

Something like the nomap stuff Ard put together might be useful here.

Thanks,
Mark.

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 18/19] arm64: kdump: update a kernel doc
  2016-01-18 10:26       ` AKASHI Takahiro
@ 2016-01-19  1:43         ` Dave Young
  -1 siblings, 0 replies; 174+ messages in thread
From: Dave Young @ 2016-01-19  1:43 UTC (permalink / raw)
  To: linux-arm-kernel

On 01/18/16 at 07:26pm, AKASHI Takahiro wrote:
> On 01/16/2016 05:16 AM, Mark Rutland wrote:
> >On Fri, Jan 15, 2016 at 07:18:38PM +0000, Geoff Levand wrote:
> >>From: AKASHI Takahiro <takahiro.akashi@linaro.org>
> >>
> >>This patch adds arch specific descriptions about kdump usage on arm64
> >>to kdump.txt.
> >>
> >>Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
> >>---
> >>  Documentation/kdump/kdump.txt | 23 ++++++++++++++++++++++-
> >>  1 file changed, 22 insertions(+), 1 deletion(-)
> >>
> >>diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
> >>index bc4bd5a..36cf978 100644
> >>--- a/Documentation/kdump/kdump.txt
> >>+++ b/Documentation/kdump/kdump.txt
> >>@@ -18,7 +18,7 @@ memory image to a dump file on the local disk, or across the network to
> >>  a remote system.
> >>
> >>  Kdump and kexec are currently supported on the x86, x86_64, ppc64, ia64,
> >>-s390x and arm architectures.
> >>+s390x, arm and arm64 architectures.
> >>
> >>  When the system kernel boots, it reserves a small section of memory for
> >>  the dump-capture kernel. This ensures that ongoing Direct Memory Access
> >>@@ -249,6 +249,20 @@ Dump-capture kernel config options (Arch Dependent, arm)
> >>
> >>      AUTO_ZRELADDR=y
> >>
> >>+Dump-capture kernel config options (Arch Dependent, arm64)
> >>+----------------------------------------------------------
> >>+
> >>+1) The maximum memory size on the dump-capture kernel must be limited by
> >>+   specifying:
> >>+
> >>+   mem=X[MG]
> >>+
> >>+   where X should be less than or equal to the size in "crashkernel="
> >>+   boot parameter. Kexec-tools will automatically add this.
> >
> >
> >This is extremely fragile, and will trivially fail when the kernel can
> >be loaded anywhere (see [1]).
> 
> As I said before, this restriction also exists on arm, but I understand
> that recent Ard's patches break it.
> 
> >We must explicitly describe the set of regions the crash kernel may use
> >(i.e. we need base and size). NAK in the absence of that.
> 
> There seem to exist several approaches:
> (a) use a device-tree property, "linux,usable-memory", in addition to "reg"
>     under "memory" node
> (b) use a kernel's early parameter, "memmap=nn[@#$]ss"
> 
> Power PC takes (a), while this does not work on efi-started kernel
> because dtb has no "memory" nodes under efi.
> X86 takes (b). If we take this, we will need to overwrite a weak
> early_init_dt_add_memory().

X86 takes another way in latest kexec-tools and kexec_file_load, that is
recreating E820 table and pass it to kexec/kdump kernel, if the entries
are over E820 limitation then turn to use setup_data list for remain
entries.

I think it is X86 specific. Personally I think device tree property is
better.

Thanks
Dave

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 18/19] arm64: kdump: update a kernel doc
@ 2016-01-19  1:43         ` Dave Young
  0 siblings, 0 replies; 174+ messages in thread
From: Dave Young @ 2016-01-19  1:43 UTC (permalink / raw)
  To: AKASHI Takahiro
  Cc: Mark Rutland, ard.biesheuvel, Geoff Levand, Catalin Marinas,
	Will Deacon, marc.zyngier, James Morse, christoffer.dall, kexec,
	linux-arm-kernel

On 01/18/16 at 07:26pm, AKASHI Takahiro wrote:
> On 01/16/2016 05:16 AM, Mark Rutland wrote:
> >On Fri, Jan 15, 2016 at 07:18:38PM +0000, Geoff Levand wrote:
> >>From: AKASHI Takahiro <takahiro.akashi@linaro.org>
> >>
> >>This patch adds arch specific descriptions about kdump usage on arm64
> >>to kdump.txt.
> >>
> >>Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
> >>---
> >>  Documentation/kdump/kdump.txt | 23 ++++++++++++++++++++++-
> >>  1 file changed, 22 insertions(+), 1 deletion(-)
> >>
> >>diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
> >>index bc4bd5a..36cf978 100644
> >>--- a/Documentation/kdump/kdump.txt
> >>+++ b/Documentation/kdump/kdump.txt
> >>@@ -18,7 +18,7 @@ memory image to a dump file on the local disk, or across the network to
> >>  a remote system.
> >>
> >>  Kdump and kexec are currently supported on the x86, x86_64, ppc64, ia64,
> >>-s390x and arm architectures.
> >>+s390x, arm and arm64 architectures.
> >>
> >>  When the system kernel boots, it reserves a small section of memory for
> >>  the dump-capture kernel. This ensures that ongoing Direct Memory Access
> >>@@ -249,6 +249,20 @@ Dump-capture kernel config options (Arch Dependent, arm)
> >>
> >>      AUTO_ZRELADDR=y
> >>
> >>+Dump-capture kernel config options (Arch Dependent, arm64)
> >>+----------------------------------------------------------
> >>+
> >>+1) The maximum memory size on the dump-capture kernel must be limited by
> >>+   specifying:
> >>+
> >>+   mem=X[MG]
> >>+
> >>+   where X should be less than or equal to the size in "crashkernel="
> >>+   boot parameter. Kexec-tools will automatically add this.
> >
> >
> >This is extremely fragile, and will trivially fail when the kernel can
> >be loaded anywhere (see [1]).
> 
> As I said before, this restriction also exists on arm, but I understand
> that recent Ard's patches break it.
> 
> >We must explicitly describe the set of regions the crash kernel may use
> >(i.e. we need base and size). NAK in the absence of that.
> 
> There seem to exist several approaches:
> (a) use a device-tree property, "linux,usable-memory", in addition to "reg"
>     under "memory" node
> (b) use a kernel's early parameter, "memmap=nn[@#$]ss"
> 
> Power PC takes (a), while this does not work on efi-started kernel
> because dtb has no "memory" nodes under efi.
> X86 takes (b). If we take this, we will need to overwrite a weak
> early_init_dt_add_memory().

X86 takes another way in latest kexec-tools and kexec_file_load, that is
recreating E820 table and pass it to kexec/kdump kernel, if the entries
are over E820 limitation then turn to use setup_data list for remain
entries.

I think it is X86 specific. Personally I think device tree property is
better.

Thanks
Dave

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 18/19] arm64: kdump: update a kernel doc
  2016-01-19  1:43         ` Dave Young
@ 2016-01-19  1:50           ` Dave Young
  -1 siblings, 0 replies; 174+ messages in thread
From: Dave Young @ 2016-01-19  1:50 UTC (permalink / raw)
  To: linux-arm-kernel

On 01/19/16 at 09:43am, Dave Young wrote:
> On 01/18/16 at 07:26pm, AKASHI Takahiro wrote:
> > On 01/16/2016 05:16 AM, Mark Rutland wrote:
> > >On Fri, Jan 15, 2016 at 07:18:38PM +0000, Geoff Levand wrote:
> > >>From: AKASHI Takahiro <takahiro.akashi@linaro.org>
> > >>
> > >>This patch adds arch specific descriptions about kdump usage on arm64
> > >>to kdump.txt.
> > >>
> > >>Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
> > >>---
> > >>  Documentation/kdump/kdump.txt | 23 ++++++++++++++++++++++-
> > >>  1 file changed, 22 insertions(+), 1 deletion(-)
> > >>
> > >>diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
> > >>index bc4bd5a..36cf978 100644
> > >>--- a/Documentation/kdump/kdump.txt
> > >>+++ b/Documentation/kdump/kdump.txt
> > >>@@ -18,7 +18,7 @@ memory image to a dump file on the local disk, or across the network to
> > >>  a remote system.
> > >>
> > >>  Kdump and kexec are currently supported on the x86, x86_64, ppc64, ia64,
> > >>-s390x and arm architectures.
> > >>+s390x, arm and arm64 architectures.
> > >>
> > >>  When the system kernel boots, it reserves a small section of memory for
> > >>  the dump-capture kernel. This ensures that ongoing Direct Memory Access
> > >>@@ -249,6 +249,20 @@ Dump-capture kernel config options (Arch Dependent, arm)
> > >>
> > >>      AUTO_ZRELADDR=y
> > >>
> > >>+Dump-capture kernel config options (Arch Dependent, arm64)
> > >>+----------------------------------------------------------
> > >>+
> > >>+1) The maximum memory size on the dump-capture kernel must be limited by
> > >>+   specifying:
> > >>+
> > >>+   mem=X[MG]
> > >>+
> > >>+   where X should be less than or equal to the size in "crashkernel="
> > >>+   boot parameter. Kexec-tools will automatically add this.
> > >
> > >
> > >This is extremely fragile, and will trivially fail when the kernel can
> > >be loaded anywhere (see [1]).
> > 
> > As I said before, this restriction also exists on arm, but I understand
> > that recent Ard's patches break it.
> > 
> > >We must explicitly describe the set of regions the crash kernel may use
> > >(i.e. we need base and size). NAK in the absence of that.
> > 
> > There seem to exist several approaches:
> > (a) use a device-tree property, "linux,usable-memory", in addition to "reg"
> >     under "memory" node
> > (b) use a kernel's early parameter, "memmap=nn[@#$]ss"
> > 
> > Power PC takes (a), while this does not work on efi-started kernel
> > because dtb has no "memory" nodes under efi.
> > X86 takes (b). If we take this, we will need to overwrite a weak
> > early_init_dt_add_memory().
> 
> X86 takes another way in latest kexec-tools and kexec_file_load, that is
> recreating E820 table and pass it to kexec/kdump kernel, if the entries
> are over E820 limitation then turn to use setup_data list for remain
> entries.

One reason about the changing is kernel cmdline space is limited. We need
to pass not only usable memory ranges, also reserved and other different
memory ranges, cmdline array size is not enough sometimes.

> 
> I think it is X86 specific. Personally I think device tree property is
> better.
> 
> Thanks
> Dave

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 18/19] arm64: kdump: update a kernel doc
@ 2016-01-19  1:50           ` Dave Young
  0 siblings, 0 replies; 174+ messages in thread
From: Dave Young @ 2016-01-19  1:50 UTC (permalink / raw)
  To: AKASHI Takahiro
  Cc: Mark Rutland, ard.biesheuvel, Geoff Levand, Catalin Marinas,
	Will Deacon, marc.zyngier, James Morse, christoffer.dall, kexec,
	linux-arm-kernel

On 01/19/16 at 09:43am, Dave Young wrote:
> On 01/18/16 at 07:26pm, AKASHI Takahiro wrote:
> > On 01/16/2016 05:16 AM, Mark Rutland wrote:
> > >On Fri, Jan 15, 2016 at 07:18:38PM +0000, Geoff Levand wrote:
> > >>From: AKASHI Takahiro <takahiro.akashi@linaro.org>
> > >>
> > >>This patch adds arch specific descriptions about kdump usage on arm64
> > >>to kdump.txt.
> > >>
> > >>Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
> > >>---
> > >>  Documentation/kdump/kdump.txt | 23 ++++++++++++++++++++++-
> > >>  1 file changed, 22 insertions(+), 1 deletion(-)
> > >>
> > >>diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
> > >>index bc4bd5a..36cf978 100644
> > >>--- a/Documentation/kdump/kdump.txt
> > >>+++ b/Documentation/kdump/kdump.txt
> > >>@@ -18,7 +18,7 @@ memory image to a dump file on the local disk, or across the network to
> > >>  a remote system.
> > >>
> > >>  Kdump and kexec are currently supported on the x86, x86_64, ppc64, ia64,
> > >>-s390x and arm architectures.
> > >>+s390x, arm and arm64 architectures.
> > >>
> > >>  When the system kernel boots, it reserves a small section of memory for
> > >>  the dump-capture kernel. This ensures that ongoing Direct Memory Access
> > >>@@ -249,6 +249,20 @@ Dump-capture kernel config options (Arch Dependent, arm)
> > >>
> > >>      AUTO_ZRELADDR=y
> > >>
> > >>+Dump-capture kernel config options (Arch Dependent, arm64)
> > >>+----------------------------------------------------------
> > >>+
> > >>+1) The maximum memory size on the dump-capture kernel must be limited by
> > >>+   specifying:
> > >>+
> > >>+   mem=X[MG]
> > >>+
> > >>+   where X should be less than or equal to the size in "crashkernel="
> > >>+   boot parameter. Kexec-tools will automatically add this.
> > >
> > >
> > >This is extremely fragile, and will trivially fail when the kernel can
> > >be loaded anywhere (see [1]).
> > 
> > As I said before, this restriction also exists on arm, but I understand
> > that recent Ard's patches break it.
> > 
> > >We must explicitly describe the set of regions the crash kernel may use
> > >(i.e. we need base and size). NAK in the absence of that.
> > 
> > There seem to exist several approaches:
> > (a) use a device-tree property, "linux,usable-memory", in addition to "reg"
> >     under "memory" node
> > (b) use a kernel's early parameter, "memmap=nn[@#$]ss"
> > 
> > Power PC takes (a), while this does not work on efi-started kernel
> > because dtb has no "memory" nodes under efi.
> > X86 takes (b). If we take this, we will need to overwrite a weak
> > early_init_dt_add_memory().
> 
> X86 takes another way in latest kexec-tools and kexec_file_load, that is
> recreating E820 table and pass it to kexec/kdump kernel, if the entries
> are over E820 limitation then turn to use setup_data list for remain
> entries.

One reason about the changing is kernel cmdline space is limited. We need
to pass not only usable memory ranges, also reserved and other different
memory ranges, cmdline array size is not enough sometimes.

> 
> I think it is X86 specific. Personally I think device tree property is
> better.
> 
> Thanks
> Dave

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 18/19] arm64: kdump: update a kernel doc
  2016-01-18 11:29         ` Mark Rutland
@ 2016-01-19  5:31           ` AKASHI Takahiro
  -1 siblings, 0 replies; 174+ messages in thread
From: AKASHI Takahiro @ 2016-01-19  5:31 UTC (permalink / raw)
  To: linux-arm-kernel

On 01/18/2016 08:29 PM, Mark Rutland wrote:
> On Mon, Jan 18, 2016 at 07:26:04PM +0900, AKASHI Takahiro wrote:
>> On 01/16/2016 05:16 AM, Mark Rutland wrote:
>>> On Fri, Jan 15, 2016 at 07:18:38PM +0000, Geoff Levand wrote:
>>>> From: AKASHI Takahiro <takahiro.akashi@linaro.org>
>>>>
>>>> This patch adds arch specific descriptions about kdump usage on arm64
>>>> to kdump.txt.
>>>>
>>>> Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
>>>> ---
>>>>   Documentation/kdump/kdump.txt | 23 ++++++++++++++++++++++-
>>>>   1 file changed, 22 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
>>>> index bc4bd5a..36cf978 100644
>>>> --- a/Documentation/kdump/kdump.txt
>>>> +++ b/Documentation/kdump/kdump.txt
>>>> @@ -18,7 +18,7 @@ memory image to a dump file on the local disk, or across the network to
>>>>   a remote system.
>>>>
>>>>   Kdump and kexec are currently supported on the x86, x86_64, ppc64, ia64,
>>>> -s390x and arm architectures.
>>>> +s390x, arm and arm64 architectures.
>>>>
>>>>   When the system kernel boots, it reserves a small section of memory for
>>>>   the dump-capture kernel. This ensures that ongoing Direct Memory Access
>>>> @@ -249,6 +249,20 @@ Dump-capture kernel config options (Arch Dependent, arm)
>>>>
>>>>       AUTO_ZRELADDR=y
>>>>
>>>> +Dump-capture kernel config options (Arch Dependent, arm64)
>>>> +----------------------------------------------------------
>>>> +
>>>> +1) The maximum memory size on the dump-capture kernel must be limited by
>>>> +   specifying:
>>>> +
>>>> +   mem=X[MG]
>>>> +
>>>> +   where X should be less than or equal to the size in "crashkernel="
>>>> +   boot parameter. Kexec-tools will automatically add this.
>>>
>>>
>>> This is extremely fragile, and will trivially fail when the kernel can
>>> be loaded anywhere (see [1]).
>>
>> As I said before, this restriction also exists on arm, but I understand
>> that recent Ard's patches break it.
>>
>>> We must explicitly describe the set of regions the crash kernel may use
>>> (i.e. we need base and size). NAK in the absence of that.
>>
>> There seem to exist several approaches:
>> (a) use a device-tree property, "linux,usable-memory", in addition to "reg"
>
> I'm not opposed to the idea of a DT property, though I think that should
> live under /chosen.

In fact, powerpc uses another property, "linux,crashkernel-base(& size)",
under /chosen in order for the *1st kernel* to export info about a memory
region for the 2nd(crash dump) kernel to user apps (kexec-tools).

> I see that "linux,usable-memory" exists already, though I'm confused as
> to exactly what it is for as there is no documentation (neither in the
> kernel nor in ePAPR).

For example,
   memory at 0x80000000 {
     reg = <0x0 0x80000000 0x0 0x80000000>;
     linux,usable-memory = <0x0 0x8c000000 0x0 0x4000000>;
   }
There exists 2GB memory available on the system, but the last 64MB can be
used as a system ram. See early_init_dt_scan_memory() in fdt.c.

> It's also painful to alter multiple memory nodes
> to use that, and I can see that going wrong.

Yeah, I implemented this feature in my old versions experimentally,
but didn't like it as we had to touch all the memory nodes.

>>      under "memory" node
>> (b) use a kernel's early parameter, "memmap=nn[@#$]ss"
>
> I'm not too keen on this, as I think it's fragile, and logically
> somewhat distinct from what mem= is for (a best effort testing tool).

I'm not sure whether it is fragile, and contrary to x86, as Dave
described, I think we will only need a single memmap= on arm64 as
efi's mem map table is accessible even on the crash kernel.

>> Power PC takes (a), while this does not work on efi-started kernel
>> because dtb has no "memory" nodes under efi.
>
> A property under /chosen would work for EFI too.
>
>> X86 takes (b). If we take this, we will need to overwrite a weak
>> early_init_dt_add_memory().
>> (I thought that this approach was not smart as we have three different
>> ways to specify memory regions, dtb, efi and this kernel parameter.)
>
> I'm not sure that's a big problem. We may be able to make this generic,
> also.
>
> We don't necessarily need a weak add memory function if we can guarantee
> nothing gets memblock_alloc'd before we carve it out.
>
> Something like the nomap stuff Ard put together might be useful here.

I'm afraid it doesn't work.
It doesn't matter whether it is linearly mapped or not. We should prevent
any part of memory regions used by the 1st kernel from being reclaimed
by memblock_alloc() and others.
Or do you mean we can introduce another memblock flag?

-Takahiro AKASHI

> Thanks,
> Mark.
>

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 18/19] arm64: kdump: update a kernel doc
@ 2016-01-19  5:31           ` AKASHI Takahiro
  0 siblings, 0 replies; 174+ messages in thread
From: AKASHI Takahiro @ 2016-01-19  5:31 UTC (permalink / raw)
  To: Mark Rutland, ard.biesheuvel
  Cc: Geoff Levand, Catalin Marinas, Will Deacon, marc.zyngier,
	James Morse, linux-arm-kernel, kexec, christoffer.dall

On 01/18/2016 08:29 PM, Mark Rutland wrote:
> On Mon, Jan 18, 2016 at 07:26:04PM +0900, AKASHI Takahiro wrote:
>> On 01/16/2016 05:16 AM, Mark Rutland wrote:
>>> On Fri, Jan 15, 2016 at 07:18:38PM +0000, Geoff Levand wrote:
>>>> From: AKASHI Takahiro <takahiro.akashi@linaro.org>
>>>>
>>>> This patch adds arch specific descriptions about kdump usage on arm64
>>>> to kdump.txt.
>>>>
>>>> Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
>>>> ---
>>>>   Documentation/kdump/kdump.txt | 23 ++++++++++++++++++++++-
>>>>   1 file changed, 22 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
>>>> index bc4bd5a..36cf978 100644
>>>> --- a/Documentation/kdump/kdump.txt
>>>> +++ b/Documentation/kdump/kdump.txt
>>>> @@ -18,7 +18,7 @@ memory image to a dump file on the local disk, or across the network to
>>>>   a remote system.
>>>>
>>>>   Kdump and kexec are currently supported on the x86, x86_64, ppc64, ia64,
>>>> -s390x and arm architectures.
>>>> +s390x, arm and arm64 architectures.
>>>>
>>>>   When the system kernel boots, it reserves a small section of memory for
>>>>   the dump-capture kernel. This ensures that ongoing Direct Memory Access
>>>> @@ -249,6 +249,20 @@ Dump-capture kernel config options (Arch Dependent, arm)
>>>>
>>>>       AUTO_ZRELADDR=y
>>>>
>>>> +Dump-capture kernel config options (Arch Dependent, arm64)
>>>> +----------------------------------------------------------
>>>> +
>>>> +1) The maximum memory size on the dump-capture kernel must be limited by
>>>> +   specifying:
>>>> +
>>>> +   mem=X[MG]
>>>> +
>>>> +   where X should be less than or equal to the size in "crashkernel="
>>>> +   boot parameter. Kexec-tools will automatically add this.
>>>
>>>
>>> This is extremely fragile, and will trivially fail when the kernel can
>>> be loaded anywhere (see [1]).
>>
>> As I said before, this restriction also exists on arm, but I understand
>> that recent Ard's patches break it.
>>
>>> We must explicitly describe the set of regions the crash kernel may use
>>> (i.e. we need base and size). NAK in the absence of that.
>>
>> There seem to exist several approaches:
>> (a) use a device-tree property, "linux,usable-memory", in addition to "reg"
>
> I'm not opposed to the idea of a DT property, though I think that should
> live under /chosen.

In fact, powerpc uses another property, "linux,crashkernel-base(& size)",
under /chosen in order for the *1st kernel* to export info about a memory
region for the 2nd(crash dump) kernel to user apps (kexec-tools).

> I see that "linux,usable-memory" exists already, though I'm confused as
> to exactly what it is for as there is no documentation (neither in the
> kernel nor in ePAPR).

For example,
   memory@0x80000000 {
     reg = <0x0 0x80000000 0x0 0x80000000>;
     linux,usable-memory = <0x0 0x8c000000 0x0 0x4000000>;
   }
There exists 2GB memory available on the system, but the last 64MB can be
used as a system ram. See early_init_dt_scan_memory() in fdt.c.

> It's also painful to alter multiple memory nodes
> to use that, and I can see that going wrong.

Yeah, I implemented this feature in my old versions experimentally,
but didn't like it as we had to touch all the memory nodes.

>>      under "memory" node
>> (b) use a kernel's early parameter, "memmap=nn[@#$]ss"
>
> I'm not too keen on this, as I think it's fragile, and logically
> somewhat distinct from what mem= is for (a best effort testing tool).

I'm not sure whether it is fragile, and contrary to x86, as Dave
described, I think we will only need a single memmap= on arm64 as
efi's mem map table is accessible even on the crash kernel.

>> Power PC takes (a), while this does not work on efi-started kernel
>> because dtb has no "memory" nodes under efi.
>
> A property under /chosen would work for EFI too.
>
>> X86 takes (b). If we take this, we will need to overwrite a weak
>> early_init_dt_add_memory().
>> (I thought that this approach was not smart as we have three different
>> ways to specify memory regions, dtb, efi and this kernel parameter.)
>
> I'm not sure that's a big problem. We may be able to make this generic,
> also.
>
> We don't necessarily need a weak add memory function if we can guarantee
> nothing gets memblock_alloc'd before we carve it out.
>
> Something like the nomap stuff Ard put together might be useful here.

I'm afraid it doesn't work.
It doesn't matter whether it is linearly mapped or not. We should prevent
any part of memory regions used by the 1st kernel from being reclaimed
by memblock_alloc() and others.
Or do you mean we can introduce another memblock flag?

-Takahiro AKASHI

> Thanks,
> Mark.
>

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 18/19] arm64: kdump: update a kernel doc
  2016-01-19  1:43         ` Dave Young
@ 2016-01-19  5:35           ` AKASHI Takahiro
  -1 siblings, 0 replies; 174+ messages in thread
From: AKASHI Takahiro @ 2016-01-19  5:35 UTC (permalink / raw)
  To: linux-arm-kernel

On 01/19/2016 10:43 AM, Dave Young wrote:
> On 01/18/16 at 07:26pm, AKASHI Takahiro wrote:
>> On 01/16/2016 05:16 AM, Mark Rutland wrote:
>>> On Fri, Jan 15, 2016 at 07:18:38PM +0000, Geoff Levand wrote:
>>>> From: AKASHI Takahiro <takahiro.akashi@linaro.org>
>>>>
>>>> This patch adds arch specific descriptions about kdump usage on arm64
>>>> to kdump.txt.
>>>>
>>>> Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
>>>> ---
>>>>   Documentation/kdump/kdump.txt | 23 ++++++++++++++++++++++-
>>>>   1 file changed, 22 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
>>>> index bc4bd5a..36cf978 100644
>>>> --- a/Documentation/kdump/kdump.txt
>>>> +++ b/Documentation/kdump/kdump.txt
>>>> @@ -18,7 +18,7 @@ memory image to a dump file on the local disk, or across the network to
>>>>   a remote system.
>>>>
>>>>   Kdump and kexec are currently supported on the x86, x86_64, ppc64, ia64,
>>>> -s390x and arm architectures.
>>>> +s390x, arm and arm64 architectures.
>>>>
>>>>   When the system kernel boots, it reserves a small section of memory for
>>>>   the dump-capture kernel. This ensures that ongoing Direct Memory Access
>>>> @@ -249,6 +249,20 @@ Dump-capture kernel config options (Arch Dependent, arm)
>>>>
>>>>       AUTO_ZRELADDR=y
>>>>
>>>> +Dump-capture kernel config options (Arch Dependent, arm64)
>>>> +----------------------------------------------------------
>>>> +
>>>> +1) The maximum memory size on the dump-capture kernel must be limited by
>>>> +   specifying:
>>>> +
>>>> +   mem=X[MG]
>>>> +
>>>> +   where X should be less than or equal to the size in "crashkernel="
>>>> +   boot parameter. Kexec-tools will automatically add this.
>>>
>>>
>>> This is extremely fragile, and will trivially fail when the kernel can
>>> be loaded anywhere (see [1]).
>>
>> As I said before, this restriction also exists on arm, but I understand
>> that recent Ard's patches break it.
>>
>>> We must explicitly describe the set of regions the crash kernel may use
>>> (i.e. we need base and size). NAK in the absence of that.
>>
>> There seem to exist several approaches:
>> (a) use a device-tree property, "linux,usable-memory", in addition to "reg"
>>      under "memory" node
>> (b) use a kernel's early parameter, "memmap=nn[@#$]ss"
>>
>> Power PC takes (a), while this does not work on efi-started kernel
>> because dtb has no "memory" nodes under efi.
>> X86 takes (b). If we take this, we will need to overwrite a weak
>> early_init_dt_add_memory().
>
> X86 takes another way in latest kexec-tools and kexec_file_load, that is
> recreating E820 table and pass it to kexec/kdump kernel, if the entries
> are over E820 limitation then turn to use setup_data list for remain
> entries.

Thanks. I will visit x86 code again.

> I think it is X86 specific. Personally I think device tree property is
> better.

Do you think so?

-Takahiro AKASHI


>
> Thanks
> Dave
>

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 18/19] arm64: kdump: update a kernel doc
@ 2016-01-19  5:35           ` AKASHI Takahiro
  0 siblings, 0 replies; 174+ messages in thread
From: AKASHI Takahiro @ 2016-01-19  5:35 UTC (permalink / raw)
  To: Dave Young
  Cc: Mark Rutland, ard.biesheuvel, Geoff Levand, Catalin Marinas,
	Will Deacon, marc.zyngier, James Morse, christoffer.dall, kexec,
	linux-arm-kernel

On 01/19/2016 10:43 AM, Dave Young wrote:
> On 01/18/16 at 07:26pm, AKASHI Takahiro wrote:
>> On 01/16/2016 05:16 AM, Mark Rutland wrote:
>>> On Fri, Jan 15, 2016 at 07:18:38PM +0000, Geoff Levand wrote:
>>>> From: AKASHI Takahiro <takahiro.akashi@linaro.org>
>>>>
>>>> This patch adds arch specific descriptions about kdump usage on arm64
>>>> to kdump.txt.
>>>>
>>>> Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
>>>> ---
>>>>   Documentation/kdump/kdump.txt | 23 ++++++++++++++++++++++-
>>>>   1 file changed, 22 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
>>>> index bc4bd5a..36cf978 100644
>>>> --- a/Documentation/kdump/kdump.txt
>>>> +++ b/Documentation/kdump/kdump.txt
>>>> @@ -18,7 +18,7 @@ memory image to a dump file on the local disk, or across the network to
>>>>   a remote system.
>>>>
>>>>   Kdump and kexec are currently supported on the x86, x86_64, ppc64, ia64,
>>>> -s390x and arm architectures.
>>>> +s390x, arm and arm64 architectures.
>>>>
>>>>   When the system kernel boots, it reserves a small section of memory for
>>>>   the dump-capture kernel. This ensures that ongoing Direct Memory Access
>>>> @@ -249,6 +249,20 @@ Dump-capture kernel config options (Arch Dependent, arm)
>>>>
>>>>       AUTO_ZRELADDR=y
>>>>
>>>> +Dump-capture kernel config options (Arch Dependent, arm64)
>>>> +----------------------------------------------------------
>>>> +
>>>> +1) The maximum memory size on the dump-capture kernel must be limited by
>>>> +   specifying:
>>>> +
>>>> +   mem=X[MG]
>>>> +
>>>> +   where X should be less than or equal to the size in "crashkernel="
>>>> +   boot parameter. Kexec-tools will automatically add this.
>>>
>>>
>>> This is extremely fragile, and will trivially fail when the kernel can
>>> be loaded anywhere (see [1]).
>>
>> As I said before, this restriction also exists on arm, but I understand
>> that recent Ard's patches break it.
>>
>>> We must explicitly describe the set of regions the crash kernel may use
>>> (i.e. we need base and size). NAK in the absence of that.
>>
>> There seem to exist several approaches:
>> (a) use a device-tree property, "linux,usable-memory", in addition to "reg"
>>      under "memory" node
>> (b) use a kernel's early parameter, "memmap=nn[@#$]ss"
>>
>> Power PC takes (a), while this does not work on efi-started kernel
>> because dtb has no "memory" nodes under efi.
>> X86 takes (b). If we take this, we will need to overwrite a weak
>> early_init_dt_add_memory().
>
> X86 takes another way in latest kexec-tools and kexec_file_load, that is
> recreating E820 table and pass it to kexec/kdump kernel, if the entries
> are over E820 limitation then turn to use setup_data list for remain
> entries.

Thanks. I will visit x86 code again.

> I think it is X86 specific. Personally I think device tree property is
> better.

Do you think so?

-Takahiro AKASHI


>
> Thanks
> Dave
>

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 04/19] arm64: Cleanup SCTLR flags
  2016-01-18 10:12       ` Marc Zyngier
@ 2016-01-19 11:59         ` Dave Martin
  -1 siblings, 0 replies; 174+ messages in thread
From: Dave Martin @ 2016-01-19 11:59 UTC (permalink / raw)
  To: linux-arm-kernel

On Mon, Jan 18, 2016 at 10:12:18AM +0000, Marc Zyngier wrote:
> On 15/01/16 20:07, Mark Rutland wrote:

[...]

> > Marc, Christoffer, I note that in SCTLR_EL2_FLAGS we don't set the RES1
> > bits of SCTLR_EL2 (not in head.S el2_setup). Should we perhaps be doing
> > so so as to avoid any future surprises?
> 
> Yes, that's one of the numerous instances of the same problem - I think
> Dave Martin also has some fixes in that area.
> 
> I'll definitely take patches!

Yep, we have a similar problem with CPTR_EL2, patch to follow at some
point.  There are likely other instances...

Cheers
---Dave

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 04/19] arm64: Cleanup SCTLR flags
@ 2016-01-19 11:59         ` Dave Martin
  0 siblings, 0 replies; 174+ messages in thread
From: Dave Martin @ 2016-01-19 11:59 UTC (permalink / raw)
  To: Marc Zyngier
  Cc: Mark Rutland, Geoff Levand, Catalin Marinas, Will Deacon,
	AKASHI Takahiro, James Morse, linux-arm-kernel, kexec,
	christoffer.dall

On Mon, Jan 18, 2016 at 10:12:18AM +0000, Marc Zyngier wrote:
> On 15/01/16 20:07, Mark Rutland wrote:

[...]

> > Marc, Christoffer, I note that in SCTLR_EL2_FLAGS we don't set the RES1
> > bits of SCTLR_EL2 (not in head.S el2_setup). Should we perhaps be doing
> > so so as to avoid any future surprises?
> 
> Yes, that's one of the numerous instances of the same problem - I think
> Dave Martin also has some fixes in that area.
> 
> I'll definitely take patches!

Yep, we have a similar problem with CPTR_EL2, patch to follow at some
point.  There are likely other instances...

Cheers
---Dave

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 18/19] arm64: kdump: update a kernel doc
  2016-01-19  5:31           ` AKASHI Takahiro
@ 2016-01-19 12:10             ` Mark Rutland
  -1 siblings, 0 replies; 174+ messages in thread
From: Mark Rutland @ 2016-01-19 12:10 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Jan 19, 2016 at 02:31:05PM +0900, AKASHI Takahiro wrote:
> On 01/18/2016 08:29 PM, Mark Rutland wrote:
> >On Mon, Jan 18, 2016 at 07:26:04PM +0900, AKASHI Takahiro wrote:
> >>On 01/16/2016 05:16 AM, Mark Rutland wrote:
> >>>On Fri, Jan 15, 2016 at 07:18:38PM +0000, Geoff Levand wrote:
> >>>>From: AKASHI Takahiro <takahiro.akashi@linaro.org>
> >>>>
> >>>>This patch adds arch specific descriptions about kdump usage on arm64
> >>>>to kdump.txt.
> >>>>
> >>>>Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
> >>>>---
> >>>>  Documentation/kdump/kdump.txt | 23 ++++++++++++++++++++++-
> >>>>  1 file changed, 22 insertions(+), 1 deletion(-)
> >>>>
> >>>>diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
> >>>>index bc4bd5a..36cf978 100644
> >>>>--- a/Documentation/kdump/kdump.txt
> >>>>+++ b/Documentation/kdump/kdump.txt
> >>>>@@ -18,7 +18,7 @@ memory image to a dump file on the local disk, or across the network to
> >>>>  a remote system.
> >>>>
> >>>>  Kdump and kexec are currently supported on the x86, x86_64, ppc64, ia64,
> >>>>-s390x and arm architectures.
> >>>>+s390x, arm and arm64 architectures.
> >>>>
> >>>>  When the system kernel boots, it reserves a small section of memory for
> >>>>  the dump-capture kernel. This ensures that ongoing Direct Memory Access
> >>>>@@ -249,6 +249,20 @@ Dump-capture kernel config options (Arch Dependent, arm)
> >>>>
> >>>>      AUTO_ZRELADDR=y
> >>>>
> >>>>+Dump-capture kernel config options (Arch Dependent, arm64)
> >>>>+----------------------------------------------------------
> >>>>+
> >>>>+1) The maximum memory size on the dump-capture kernel must be limited by
> >>>>+   specifying:
> >>>>+
> >>>>+   mem=X[MG]
> >>>>+
> >>>>+   where X should be less than or equal to the size in "crashkernel="
> >>>>+   boot parameter. Kexec-tools will automatically add this.
> >>>
> >>>
> >>>This is extremely fragile, and will trivially fail when the kernel can
> >>>be loaded anywhere (see [1]).
> >>
> >>As I said before, this restriction also exists on arm, but I understand
> >>that recent Ard's patches break it.
> >>
> >>>We must explicitly describe the set of regions the crash kernel may use
> >>>(i.e. we need base and size). NAK in the absence of that.
> >>
> >>There seem to exist several approaches:
> >>(a) use a device-tree property, "linux,usable-memory", in addition to "reg"
> >
> >I'm not opposed to the idea of a DT property, though I think that should
> >live under /chosen.
> 
> In fact, powerpc uses another property, "linux,crashkernel-base(& size)",
> under /chosen in order for the *1st kernel* to export info about a memory
> region for the 2nd(crash dump) kernel to user apps (kexec-tools).

Do you mean that said property is provided _to_ the 1st kernel, or
provided _by_ the first kernel?

> >I see that "linux,usable-memory" exists already, though I'm confused as
> >to exactly what it is for as there is no documentation (neither in the
> >kernel nor in ePAPR).
> 
> For example,
>   memory at 0x80000000 {
>     reg = <0x0 0x80000000 0x0 0x80000000>;
>     linux,usable-memory = <0x0 0x8c000000 0x0 0x4000000>;
>   }
> There exists 2GB memory available on the system, but the last 64MB can be
> used as a system ram. See early_init_dt_scan_memory() in fdt.c.

Sure, except that's the implementation rather than the intended
semantics (which are not defined).

> >It's also painful to alter multiple memory nodes
> >to use that, and I can see that going wrong.
> 
> Yeah, I implemented this feature in my old versions experimentally,
> but didn't like it as we had to touch all the memory nodes.
> 
> >>     under "memory" node
> >>(b) use a kernel's early parameter, "memmap=nn[@#$]ss"
> >
> >I'm not too keen on this, as I think it's fragile, and logically
> >somewhat distinct from what mem= is for (a best effort testing tool).
> 
> I'm not sure whether it is fragile, and contrary to x86, as Dave
> described, I think we will only need a single memmap= on arm64 as
> efi's mem map table is accessible even on the crash kernel.

I just realised I misread this as "mem=", apologies.

It looks like memmap= to force a specific region of memory to be used
may work.

I'd still err on the side of preferring an explicit property in the DT.

> >>Power PC takes (a), while this does not work on efi-started kernel
> >>because dtb has no "memory" nodes under efi.
> >
> >A property under /chosen would work for EFI too.
> >
> >>X86 takes (b). If we take this, we will need to overwrite a weak
> >>early_init_dt_add_memory().
> >>(I thought that this approach was not smart as we have three different
> >>ways to specify memory regions, dtb, efi and this kernel parameter.)
> >
> >I'm not sure that's a big problem. We may be able to make this generic,
> >also.
> >
> >We don't necessarily need a weak add memory function if we can guarantee
> >nothing gets memblock_alloc'd before we carve it out.
> >
> >Something like the nomap stuff Ard put together might be useful here.
> 
> I'm afraid it doesn't work.
> It doesn't matter whether it is linearly mapped or not. We should prevent
> any part of memory regions used by the 1st kernel from being reclaimed
> by memblock_alloc() and others.

Are you certain that nomap memory can be allocated? That sounds like a
major bug.

Nomap memory should act like reserved memory with the additional
property that the kernel must not map it implicitly.

> Or do you mean we can introduce another memblock flag?

That wasn't what I meant, but that would be a potential solution.

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 18/19] arm64: kdump: update a kernel doc
@ 2016-01-19 12:10             ` Mark Rutland
  0 siblings, 0 replies; 174+ messages in thread
From: Mark Rutland @ 2016-01-19 12:10 UTC (permalink / raw)
  To: AKASHI Takahiro
  Cc: ard.biesheuvel, Geoff Levand, Catalin Marinas, Will Deacon,
	marc.zyngier, James Morse, linux-arm-kernel, kexec,
	christoffer.dall

On Tue, Jan 19, 2016 at 02:31:05PM +0900, AKASHI Takahiro wrote:
> On 01/18/2016 08:29 PM, Mark Rutland wrote:
> >On Mon, Jan 18, 2016 at 07:26:04PM +0900, AKASHI Takahiro wrote:
> >>On 01/16/2016 05:16 AM, Mark Rutland wrote:
> >>>On Fri, Jan 15, 2016 at 07:18:38PM +0000, Geoff Levand wrote:
> >>>>From: AKASHI Takahiro <takahiro.akashi@linaro.org>
> >>>>
> >>>>This patch adds arch specific descriptions about kdump usage on arm64
> >>>>to kdump.txt.
> >>>>
> >>>>Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
> >>>>---
> >>>>  Documentation/kdump/kdump.txt | 23 ++++++++++++++++++++++-
> >>>>  1 file changed, 22 insertions(+), 1 deletion(-)
> >>>>
> >>>>diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
> >>>>index bc4bd5a..36cf978 100644
> >>>>--- a/Documentation/kdump/kdump.txt
> >>>>+++ b/Documentation/kdump/kdump.txt
> >>>>@@ -18,7 +18,7 @@ memory image to a dump file on the local disk, or across the network to
> >>>>  a remote system.
> >>>>
> >>>>  Kdump and kexec are currently supported on the x86, x86_64, ppc64, ia64,
> >>>>-s390x and arm architectures.
> >>>>+s390x, arm and arm64 architectures.
> >>>>
> >>>>  When the system kernel boots, it reserves a small section of memory for
> >>>>  the dump-capture kernel. This ensures that ongoing Direct Memory Access
> >>>>@@ -249,6 +249,20 @@ Dump-capture kernel config options (Arch Dependent, arm)
> >>>>
> >>>>      AUTO_ZRELADDR=y
> >>>>
> >>>>+Dump-capture kernel config options (Arch Dependent, arm64)
> >>>>+----------------------------------------------------------
> >>>>+
> >>>>+1) The maximum memory size on the dump-capture kernel must be limited by
> >>>>+   specifying:
> >>>>+
> >>>>+   mem=X[MG]
> >>>>+
> >>>>+   where X should be less than or equal to the size in "crashkernel="
> >>>>+   boot parameter. Kexec-tools will automatically add this.
> >>>
> >>>
> >>>This is extremely fragile, and will trivially fail when the kernel can
> >>>be loaded anywhere (see [1]).
> >>
> >>As I said before, this restriction also exists on arm, but I understand
> >>that recent Ard's patches break it.
> >>
> >>>We must explicitly describe the set of regions the crash kernel may use
> >>>(i.e. we need base and size). NAK in the absence of that.
> >>
> >>There seem to exist several approaches:
> >>(a) use a device-tree property, "linux,usable-memory", in addition to "reg"
> >
> >I'm not opposed to the idea of a DT property, though I think that should
> >live under /chosen.
> 
> In fact, powerpc uses another property, "linux,crashkernel-base(& size)",
> under /chosen in order for the *1st kernel* to export info about a memory
> region for the 2nd(crash dump) kernel to user apps (kexec-tools).

Do you mean that said property is provided _to_ the 1st kernel, or
provided _by_ the first kernel?

> >I see that "linux,usable-memory" exists already, though I'm confused as
> >to exactly what it is for as there is no documentation (neither in the
> >kernel nor in ePAPR).
> 
> For example,
>   memory@0x80000000 {
>     reg = <0x0 0x80000000 0x0 0x80000000>;
>     linux,usable-memory = <0x0 0x8c000000 0x0 0x4000000>;
>   }
> There exists 2GB memory available on the system, but the last 64MB can be
> used as a system ram. See early_init_dt_scan_memory() in fdt.c.

Sure, except that's the implementation rather than the intended
semantics (which are not defined).

> >It's also painful to alter multiple memory nodes
> >to use that, and I can see that going wrong.
> 
> Yeah, I implemented this feature in my old versions experimentally,
> but didn't like it as we had to touch all the memory nodes.
> 
> >>     under "memory" node
> >>(b) use a kernel's early parameter, "memmap=nn[@#$]ss"
> >
> >I'm not too keen on this, as I think it's fragile, and logically
> >somewhat distinct from what mem= is for (a best effort testing tool).
> 
> I'm not sure whether it is fragile, and contrary to x86, as Dave
> described, I think we will only need a single memmap= on arm64 as
> efi's mem map table is accessible even on the crash kernel.

I just realised I misread this as "mem=", apologies.

It looks like memmap= to force a specific region of memory to be used
may work.

I'd still err on the side of preferring an explicit property in the DT.

> >>Power PC takes (a), while this does not work on efi-started kernel
> >>because dtb has no "memory" nodes under efi.
> >
> >A property under /chosen would work for EFI too.
> >
> >>X86 takes (b). If we take this, we will need to overwrite a weak
> >>early_init_dt_add_memory().
> >>(I thought that this approach was not smart as we have three different
> >>ways to specify memory regions, dtb, efi and this kernel parameter.)
> >
> >I'm not sure that's a big problem. We may be able to make this generic,
> >also.
> >
> >We don't necessarily need a weak add memory function if we can guarantee
> >nothing gets memblock_alloc'd before we carve it out.
> >
> >Something like the nomap stuff Ard put together might be useful here.
> 
> I'm afraid it doesn't work.
> It doesn't matter whether it is linearly mapped or not. We should prevent
> any part of memory regions used by the 1st kernel from being reclaimed
> by memblock_alloc() and others.

Are you certain that nomap memory can be allocated? That sounds like a
major bug.

Nomap memory should act like reserved memory with the additional
property that the kernel must not map it implicitly.

> Or do you mean we can introduce another memblock flag?

That wasn't what I meant, but that would be a potential solution.

Thanks,
Mark.

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 18/19] arm64: kdump: update a kernel doc
  2016-01-19  1:43         ` Dave Young
@ 2016-01-19 12:17           ` Mark Rutland
  -1 siblings, 0 replies; 174+ messages in thread
From: Mark Rutland @ 2016-01-19 12:17 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Jan 19, 2016 at 09:43:32AM +0800, Dave Young wrote:
> On 01/18/16 at 07:26pm, AKASHI Takahiro wrote:
> > On 01/16/2016 05:16 AM, Mark Rutland wrote:
> > >On Fri, Jan 15, 2016 at 07:18:38PM +0000, Geoff Levand wrote:
> > >>From: AKASHI Takahiro <takahiro.akashi@linaro.org>
> > >>
> > >>This patch adds arch specific descriptions about kdump usage on arm64
> > >>to kdump.txt.
> > >>
> > >>Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
> > >>---
> > >>  Documentation/kdump/kdump.txt | 23 ++++++++++++++++++++++-
> > >>  1 file changed, 22 insertions(+), 1 deletion(-)
> > >>
> > >>diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
> > >>index bc4bd5a..36cf978 100644
> > >>--- a/Documentation/kdump/kdump.txt
> > >>+++ b/Documentation/kdump/kdump.txt
> > >>@@ -18,7 +18,7 @@ memory image to a dump file on the local disk, or across the network to
> > >>  a remote system.
> > >>
> > >>  Kdump and kexec are currently supported on the x86, x86_64, ppc64, ia64,
> > >>-s390x and arm architectures.
> > >>+s390x, arm and arm64 architectures.
> > >>
> > >>  When the system kernel boots, it reserves a small section of memory for
> > >>  the dump-capture kernel. This ensures that ongoing Direct Memory Access
> > >>@@ -249,6 +249,20 @@ Dump-capture kernel config options (Arch Dependent, arm)
> > >>
> > >>      AUTO_ZRELADDR=y
> > >>
> > >>+Dump-capture kernel config options (Arch Dependent, arm64)
> > >>+----------------------------------------------------------
> > >>+
> > >>+1) The maximum memory size on the dump-capture kernel must be limited by
> > >>+   specifying:
> > >>+
> > >>+   mem=X[MG]
> > >>+
> > >>+   where X should be less than or equal to the size in "crashkernel="
> > >>+   boot parameter. Kexec-tools will automatically add this.
> > >
> > >
> > >This is extremely fragile, and will trivially fail when the kernel can
> > >be loaded anywhere (see [1]).
> > 
> > As I said before, this restriction also exists on arm, but I understand
> > that recent Ard's patches break it.
> > 
> > >We must explicitly describe the set of regions the crash kernel may use
> > >(i.e. we need base and size). NAK in the absence of that.
> > 
> > There seem to exist several approaches:
> > (a) use a device-tree property, "linux,usable-memory", in addition to "reg"
> >     under "memory" node
> > (b) use a kernel's early parameter, "memmap=nn[@#$]ss"
> > 
> > Power PC takes (a), while this does not work on efi-started kernel
> > because dtb has no "memory" nodes under efi.
> > X86 takes (b). If we take this, we will need to overwrite a weak
> > early_init_dt_add_memory().
> 
> X86 takes another way in latest kexec-tools and kexec_file_load, that is
> recreating E820 table and pass it to kexec/kdump kernel, if the entries
> are over E820 limitation then turn to use setup_data list for remain
> entries.

This would imply modifying the EFI memory map or the memory nodes, which
I'm not keen on.

I would prefer that they are left _pristine_, and we describe the
restriction on the kdump kernel with additional properties under
/chosen.

That leaves us with more useful information about the environment of the
first kernel, is simpler for userspace (it's resilient to updates to the
UEFI memory map spec, for example), and is simple for the crash kernel.

> I think it is X86 specific. Personally I think device tree property is
> better.

As above, agreed.

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 18/19] arm64: kdump: update a kernel doc
@ 2016-01-19 12:17           ` Mark Rutland
  0 siblings, 0 replies; 174+ messages in thread
From: Mark Rutland @ 2016-01-19 12:17 UTC (permalink / raw)
  To: Dave Young
  Cc: ard.biesheuvel, Geoff Levand, Catalin Marinas, Will Deacon,
	AKASHI Takahiro, James Morse, linux-arm-kernel, marc.zyngier,
	kexec, christoffer.dall

On Tue, Jan 19, 2016 at 09:43:32AM +0800, Dave Young wrote:
> On 01/18/16 at 07:26pm, AKASHI Takahiro wrote:
> > On 01/16/2016 05:16 AM, Mark Rutland wrote:
> > >On Fri, Jan 15, 2016 at 07:18:38PM +0000, Geoff Levand wrote:
> > >>From: AKASHI Takahiro <takahiro.akashi@linaro.org>
> > >>
> > >>This patch adds arch specific descriptions about kdump usage on arm64
> > >>to kdump.txt.
> > >>
> > >>Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
> > >>---
> > >>  Documentation/kdump/kdump.txt | 23 ++++++++++++++++++++++-
> > >>  1 file changed, 22 insertions(+), 1 deletion(-)
> > >>
> > >>diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
> > >>index bc4bd5a..36cf978 100644
> > >>--- a/Documentation/kdump/kdump.txt
> > >>+++ b/Documentation/kdump/kdump.txt
> > >>@@ -18,7 +18,7 @@ memory image to a dump file on the local disk, or across the network to
> > >>  a remote system.
> > >>
> > >>  Kdump and kexec are currently supported on the x86, x86_64, ppc64, ia64,
> > >>-s390x and arm architectures.
> > >>+s390x, arm and arm64 architectures.
> > >>
> > >>  When the system kernel boots, it reserves a small section of memory for
> > >>  the dump-capture kernel. This ensures that ongoing Direct Memory Access
> > >>@@ -249,6 +249,20 @@ Dump-capture kernel config options (Arch Dependent, arm)
> > >>
> > >>      AUTO_ZRELADDR=y
> > >>
> > >>+Dump-capture kernel config options (Arch Dependent, arm64)
> > >>+----------------------------------------------------------
> > >>+
> > >>+1) The maximum memory size on the dump-capture kernel must be limited by
> > >>+   specifying:
> > >>+
> > >>+   mem=X[MG]
> > >>+
> > >>+   where X should be less than or equal to the size in "crashkernel="
> > >>+   boot parameter. Kexec-tools will automatically add this.
> > >
> > >
> > >This is extremely fragile, and will trivially fail when the kernel can
> > >be loaded anywhere (see [1]).
> > 
> > As I said before, this restriction also exists on arm, but I understand
> > that recent Ard's patches break it.
> > 
> > >We must explicitly describe the set of regions the crash kernel may use
> > >(i.e. we need base and size). NAK in the absence of that.
> > 
> > There seem to exist several approaches:
> > (a) use a device-tree property, "linux,usable-memory", in addition to "reg"
> >     under "memory" node
> > (b) use a kernel's early parameter, "memmap=nn[@#$]ss"
> > 
> > Power PC takes (a), while this does not work on efi-started kernel
> > because dtb has no "memory" nodes under efi.
> > X86 takes (b). If we take this, we will need to overwrite a weak
> > early_init_dt_add_memory().
> 
> X86 takes another way in latest kexec-tools and kexec_file_load, that is
> recreating E820 table and pass it to kexec/kdump kernel, if the entries
> are over E820 limitation then turn to use setup_data list for remain
> entries.

This would imply modifying the EFI memory map or the memory nodes, which
I'm not keen on.

I would prefer that they are left _pristine_, and we describe the
restriction on the kdump kernel with additional properties under
/chosen.

That leaves us with more useful information about the environment of the
first kernel, is simpler for userspace (it's resilient to updates to the
UEFI memory map spec, for example), and is simple for the crash kernel.

> I think it is X86 specific. Personally I think device tree property is
> better.

As above, agreed.

Thanks,
Mark.

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 18/19] arm64: kdump: update a kernel doc
  2016-01-19  5:35           ` AKASHI Takahiro
@ 2016-01-19 12:28             ` Dave Young
  -1 siblings, 0 replies; 174+ messages in thread
From: Dave Young @ 2016-01-19 12:28 UTC (permalink / raw)
  To: linux-arm-kernel

On 01/19/16 at 02:35pm, AKASHI Takahiro wrote:
> On 01/19/2016 10:43 AM, Dave Young wrote:
> >On 01/18/16 at 07:26pm, AKASHI Takahiro wrote:
> >>On 01/16/2016 05:16 AM, Mark Rutland wrote:
> >>>On Fri, Jan 15, 2016 at 07:18:38PM +0000, Geoff Levand wrote:
> >>>>From: AKASHI Takahiro <takahiro.akashi@linaro.org>
> >>>>
> >>>>This patch adds arch specific descriptions about kdump usage on arm64
> >>>>to kdump.txt.
> >>>>
> >>>>Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
> >>>>---
> >>>>  Documentation/kdump/kdump.txt | 23 ++++++++++++++++++++++-
> >>>>  1 file changed, 22 insertions(+), 1 deletion(-)
> >>>>
> >>>>diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
> >>>>index bc4bd5a..36cf978 100644
> >>>>--- a/Documentation/kdump/kdump.txt
> >>>>+++ b/Documentation/kdump/kdump.txt
> >>>>@@ -18,7 +18,7 @@ memory image to a dump file on the local disk, or across the network to
> >>>>  a remote system.
> >>>>
> >>>>  Kdump and kexec are currently supported on the x86, x86_64, ppc64, ia64,
> >>>>-s390x and arm architectures.
> >>>>+s390x, arm and arm64 architectures.
> >>>>
> >>>>  When the system kernel boots, it reserves a small section of memory for
> >>>>  the dump-capture kernel. This ensures that ongoing Direct Memory Access
> >>>>@@ -249,6 +249,20 @@ Dump-capture kernel config options (Arch Dependent, arm)
> >>>>
> >>>>      AUTO_ZRELADDR=y
> >>>>
> >>>>+Dump-capture kernel config options (Arch Dependent, arm64)
> >>>>+----------------------------------------------------------
> >>>>+
> >>>>+1) The maximum memory size on the dump-capture kernel must be limited by
> >>>>+   specifying:
> >>>>+
> >>>>+   mem=X[MG]
> >>>>+
> >>>>+   where X should be less than or equal to the size in "crashkernel="
> >>>>+   boot parameter. Kexec-tools will automatically add this.
> >>>
> >>>
> >>>This is extremely fragile, and will trivially fail when the kernel can
> >>>be loaded anywhere (see [1]).
> >>
> >>As I said before, this restriction also exists on arm, but I understand
> >>that recent Ard's patches break it.
> >>
> >>>We must explicitly describe the set of regions the crash kernel may use
> >>>(i.e. we need base and size). NAK in the absence of that.
> >>
> >>There seem to exist several approaches:
> >>(a) use a device-tree property, "linux,usable-memory", in addition to "reg"
> >>     under "memory" node
> >>(b) use a kernel's early parameter, "memmap=nn[@#$]ss"
> >>
> >>Power PC takes (a), while this does not work on efi-started kernel
> >>because dtb has no "memory" nodes under efi.
> >>X86 takes (b). If we take this, we will need to overwrite a weak
> >>early_init_dt_add_memory().
> >
> >X86 takes another way in latest kexec-tools and kexec_file_load, that is
> >recreating E820 table and pass it to kexec/kdump kernel, if the entries
> >are over E820 limitation then turn to use setup_data list for remain
> >entries.
> 
> Thanks. I will visit x86 code again.
> 
> >I think it is X86 specific. Personally I think device tree property is
> >better.
> 
> Do you think so?

I'm not sure it is the best way. For X86 we run into problem with
memmap= design, one example is pci domain X (X>1) need the pci memory
ranges being passed to kdump kernel. When we passed reserved ranges in /proc/iomem
to 2nd kernel we find that cmdline[] array is not big enough.

Do you think for arm64 only usable memory is necessary to let kdump kernel
know? I'm curious about how arm64 kernel get all memory layout from boot loader,
via UEFI memmap?

Thanks
Dave

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 18/19] arm64: kdump: update a kernel doc
@ 2016-01-19 12:28             ` Dave Young
  0 siblings, 0 replies; 174+ messages in thread
From: Dave Young @ 2016-01-19 12:28 UTC (permalink / raw)
  To: AKASHI Takahiro
  Cc: Mark Rutland, ard.biesheuvel, Geoff Levand, Catalin Marinas,
	Will Deacon, marc.zyngier, James Morse, christoffer.dall, kexec,
	linux-arm-kernel

On 01/19/16 at 02:35pm, AKASHI Takahiro wrote:
> On 01/19/2016 10:43 AM, Dave Young wrote:
> >On 01/18/16 at 07:26pm, AKASHI Takahiro wrote:
> >>On 01/16/2016 05:16 AM, Mark Rutland wrote:
> >>>On Fri, Jan 15, 2016 at 07:18:38PM +0000, Geoff Levand wrote:
> >>>>From: AKASHI Takahiro <takahiro.akashi@linaro.org>
> >>>>
> >>>>This patch adds arch specific descriptions about kdump usage on arm64
> >>>>to kdump.txt.
> >>>>
> >>>>Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
> >>>>---
> >>>>  Documentation/kdump/kdump.txt | 23 ++++++++++++++++++++++-
> >>>>  1 file changed, 22 insertions(+), 1 deletion(-)
> >>>>
> >>>>diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
> >>>>index bc4bd5a..36cf978 100644
> >>>>--- a/Documentation/kdump/kdump.txt
> >>>>+++ b/Documentation/kdump/kdump.txt
> >>>>@@ -18,7 +18,7 @@ memory image to a dump file on the local disk, or across the network to
> >>>>  a remote system.
> >>>>
> >>>>  Kdump and kexec are currently supported on the x86, x86_64, ppc64, ia64,
> >>>>-s390x and arm architectures.
> >>>>+s390x, arm and arm64 architectures.
> >>>>
> >>>>  When the system kernel boots, it reserves a small section of memory for
> >>>>  the dump-capture kernel. This ensures that ongoing Direct Memory Access
> >>>>@@ -249,6 +249,20 @@ Dump-capture kernel config options (Arch Dependent, arm)
> >>>>
> >>>>      AUTO_ZRELADDR=y
> >>>>
> >>>>+Dump-capture kernel config options (Arch Dependent, arm64)
> >>>>+----------------------------------------------------------
> >>>>+
> >>>>+1) The maximum memory size on the dump-capture kernel must be limited by
> >>>>+   specifying:
> >>>>+
> >>>>+   mem=X[MG]
> >>>>+
> >>>>+   where X should be less than or equal to the size in "crashkernel="
> >>>>+   boot parameter. Kexec-tools will automatically add this.
> >>>
> >>>
> >>>This is extremely fragile, and will trivially fail when the kernel can
> >>>be loaded anywhere (see [1]).
> >>
> >>As I said before, this restriction also exists on arm, but I understand
> >>that recent Ard's patches break it.
> >>
> >>>We must explicitly describe the set of regions the crash kernel may use
> >>>(i.e. we need base and size). NAK in the absence of that.
> >>
> >>There seem to exist several approaches:
> >>(a) use a device-tree property, "linux,usable-memory", in addition to "reg"
> >>     under "memory" node
> >>(b) use a kernel's early parameter, "memmap=nn[@#$]ss"
> >>
> >>Power PC takes (a), while this does not work on efi-started kernel
> >>because dtb has no "memory" nodes under efi.
> >>X86 takes (b). If we take this, we will need to overwrite a weak
> >>early_init_dt_add_memory().
> >
> >X86 takes another way in latest kexec-tools and kexec_file_load, that is
> >recreating E820 table and pass it to kexec/kdump kernel, if the entries
> >are over E820 limitation then turn to use setup_data list for remain
> >entries.
> 
> Thanks. I will visit x86 code again.
> 
> >I think it is X86 specific. Personally I think device tree property is
> >better.
> 
> Do you think so?

I'm not sure it is the best way. For X86 we run into problem with
memmap= design, one example is pci domain X (X>1) need the pci memory
ranges being passed to kdump kernel. When we passed reserved ranges in /proc/iomem
to 2nd kernel we find that cmdline[] array is not big enough.

Do you think for arm64 only usable memory is necessary to let kdump kernel
know? I'm curious about how arm64 kernel get all memory layout from boot loader,
via UEFI memmap?

Thanks
Dave

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 00/19] arm64 kexec kernel patches v13
  2016-01-15 19:18 ` Geoff Levand
@ 2016-01-19 12:32   ` Dave Young
  -1 siblings, 0 replies; 174+ messages in thread
From: Dave Young @ 2016-01-19 12:32 UTC (permalink / raw)
  To: linux-arm-kernel

On 01/15/16 at 07:18pm, Geoff Levand wrote:
> This series adds the core support for kexec re-boot and kdump on ARM64.  This
> version of the series combines Takahiro's kdump patches with my kexec patches.
> Please consider all patches for inclusion.
> 
> I just tested all the endian combinations of kexec LE->LE, LE->BE, BE->BE,
> BE->LE, and both LE an BE kdump, and all work as expected.
> 
> To load a second stage kernel and execute a kexec re-boot or to work with kdump
> on ARM64 systems a series of patches to kexec-tools [2], which have not yet been
> merged upstream, are needed.
> 
> To examine vmcore (/proc/vmcore), you should use
>   - gdb v7.7 or later
>   - crash v7.1.1 or later
> 
> [1]  https://git.kernel.org/cgit/linux/kernel/git/geoff/linux-kexec.git
> [2]  https://git.kernel.org/cgit/linux/kernel/git/geoff/kexec-tools.git
> 

Geoff, another question about kexec-tools part is, can the kexec-tools code
been written in kernel? We have the infrastructure for kexec_file_load.

In the future if we have to support loading a signed kernel then we still
need to do that, but we may have to support both the kexec-tools and the
in kernel loader for compatibility purpose. It will be hard to maintain.

Thanks
Dave

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 00/19] arm64 kexec kernel patches v13
@ 2016-01-19 12:32   ` Dave Young
  0 siblings, 0 replies; 174+ messages in thread
From: Dave Young @ 2016-01-19 12:32 UTC (permalink / raw)
  To: Geoff Levand
  Cc: Mark Rutland, marc.zyngier, Catalin Marinas, Will Deacon,
	AKASHI Takahiro, James Morse, christoffer.dall, kexec,
	linux-arm-kernel

On 01/15/16 at 07:18pm, Geoff Levand wrote:
> This series adds the core support for kexec re-boot and kdump on ARM64.  This
> version of the series combines Takahiro's kdump patches with my kexec patches.
> Please consider all patches for inclusion.
> 
> I just tested all the endian combinations of kexec LE->LE, LE->BE, BE->BE,
> BE->LE, and both LE an BE kdump, and all work as expected.
> 
> To load a second stage kernel and execute a kexec re-boot or to work with kdump
> on ARM64 systems a series of patches to kexec-tools [2], which have not yet been
> merged upstream, are needed.
> 
> To examine vmcore (/proc/vmcore), you should use
>   - gdb v7.7 or later
>   - crash v7.1.1 or later
> 
> [1]  https://git.kernel.org/cgit/linux/kernel/git/geoff/linux-kexec.git
> [2]  https://git.kernel.org/cgit/linux/kernel/git/geoff/kexec-tools.git
> 

Geoff, another question about kexec-tools part is, can the kexec-tools code
been written in kernel? We have the infrastructure for kexec_file_load.

In the future if we have to support loading a signed kernel then we still
need to do that, but we may have to support both the kexec-tools and the
in kernel loader for compatibility purpose. It will be hard to maintain.

Thanks
Dave

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 18/19] arm64: kdump: update a kernel doc
  2016-01-19 12:28             ` Dave Young
@ 2016-01-19 12:51               ` Mark Rutland
  -1 siblings, 0 replies; 174+ messages in thread
From: Mark Rutland @ 2016-01-19 12:51 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Jan 19, 2016 at 08:28:48PM +0800, Dave Young wrote:
> On 01/19/16 at 02:35pm, AKASHI Takahiro wrote:
> > On 01/19/2016 10:43 AM, Dave Young wrote:
> > >On 01/18/16 at 07:26pm, AKASHI Takahiro wrote:
> > >>On 01/16/2016 05:16 AM, Mark Rutland wrote:
> > >>>On Fri, Jan 15, 2016 at 07:18:38PM +0000, Geoff Levand wrote:
> > >>>>From: AKASHI Takahiro <takahiro.akashi@linaro.org>
> > >>>>
> > >>>>This patch adds arch specific descriptions about kdump usage on arm64
> > >>>>to kdump.txt.
> > >>>>
> > >>>>Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
> > >>>>---
> > >>>>  Documentation/kdump/kdump.txt | 23 ++++++++++++++++++++++-
> > >>>>  1 file changed, 22 insertions(+), 1 deletion(-)
> > >>>>
> > >>>>diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
> > >>>>index bc4bd5a..36cf978 100644
> > >>>>--- a/Documentation/kdump/kdump.txt
> > >>>>+++ b/Documentation/kdump/kdump.txt
> > >>>>@@ -18,7 +18,7 @@ memory image to a dump file on the local disk, or across the network to
> > >>>>  a remote system.
> > >>>>
> > >>>>  Kdump and kexec are currently supported on the x86, x86_64, ppc64, ia64,
> > >>>>-s390x and arm architectures.
> > >>>>+s390x, arm and arm64 architectures.
> > >>>>
> > >>>>  When the system kernel boots, it reserves a small section of memory for
> > >>>>  the dump-capture kernel. This ensures that ongoing Direct Memory Access
> > >>>>@@ -249,6 +249,20 @@ Dump-capture kernel config options (Arch Dependent, arm)
> > >>>>
> > >>>>      AUTO_ZRELADDR=y
> > >>>>
> > >>>>+Dump-capture kernel config options (Arch Dependent, arm64)
> > >>>>+----------------------------------------------------------
> > >>>>+
> > >>>>+1) The maximum memory size on the dump-capture kernel must be limited by
> > >>>>+   specifying:
> > >>>>+
> > >>>>+   mem=X[MG]
> > >>>>+
> > >>>>+   where X should be less than or equal to the size in "crashkernel="
> > >>>>+   boot parameter. Kexec-tools will automatically add this.
> > >>>
> > >>>
> > >>>This is extremely fragile, and will trivially fail when the kernel can
> > >>>be loaded anywhere (see [1]).
> > >>
> > >>As I said before, this restriction also exists on arm, but I understand
> > >>that recent Ard's patches break it.
> > >>
> > >>>We must explicitly describe the set of regions the crash kernel may use
> > >>>(i.e. we need base and size). NAK in the absence of that.
> > >>
> > >>There seem to exist several approaches:
> > >>(a) use a device-tree property, "linux,usable-memory", in addition to "reg"
> > >>     under "memory" node
> > >>(b) use a kernel's early parameter, "memmap=nn[@#$]ss"
> > >>
> > >>Power PC takes (a), while this does not work on efi-started kernel
> > >>because dtb has no "memory" nodes under efi.
> > >>X86 takes (b). If we take this, we will need to overwrite a weak
> > >>early_init_dt_add_memory().
> > >
> > >X86 takes another way in latest kexec-tools and kexec_file_load, that is
> > >recreating E820 table and pass it to kexec/kdump kernel, if the entries
> > >are over E820 limitation then turn to use setup_data list for remain
> > >entries.
> > 
> > Thanks. I will visit x86 code again.
> > 
> > >I think it is X86 specific. Personally I think device tree property is
> > >better.
> > 
> > Do you think so?
> 
> I'm not sure it is the best way. For X86 we run into problem with
> memmap= design, one example is pci domain X (X>1) need the pci memory
> ranges being passed to kdump kernel. When we passed reserved ranges in /proc/iomem
> to 2nd kernel we find that cmdline[] array is not big enough.

I'm not sure how PCI ranges relate to the memory map used for normal
memory (i.e. RAM), though I'm probably missing some caveat with the way
ACPI and UEFI describe PCI. Why does memmap= affect PCI memory?

If the kernel got the rest of its system topology from DT, the PCI
regions would be described there.

> Do you think for arm64 only usable memory is necessary to let kdump kernel
> know? I'm curious about how arm64 kernel get all memory layout from boot loader,
> via UEFI memmap?

When booted via EFI, we use the EFI memory map. The EFI stub handles
acquring the relevant information and passing that to the first kernel
in the DTB (see Documentation/arm/uefi.txt).

A kexec'd kernel should simply inherit that. So long as the DTB and/or
UEFI tables in memory are the same, it would be the same as a cold boot.

In the !EFI case, we use the memory nodes in the DTB. Only in this case
could usable-memory properties in memory nodes make sense. I'd prefer a
uniform property under /chosen for both cases.

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 18/19] arm64: kdump: update a kernel doc
@ 2016-01-19 12:51               ` Mark Rutland
  0 siblings, 0 replies; 174+ messages in thread
From: Mark Rutland @ 2016-01-19 12:51 UTC (permalink / raw)
  To: Dave Young
  Cc: ard.biesheuvel, Geoff Levand, Catalin Marinas, Will Deacon,
	AKASHI Takahiro, James Morse, christoffer.dall, marc.zyngier,
	kexec, linux-arm-kernel

On Tue, Jan 19, 2016 at 08:28:48PM +0800, Dave Young wrote:
> On 01/19/16 at 02:35pm, AKASHI Takahiro wrote:
> > On 01/19/2016 10:43 AM, Dave Young wrote:
> > >On 01/18/16 at 07:26pm, AKASHI Takahiro wrote:
> > >>On 01/16/2016 05:16 AM, Mark Rutland wrote:
> > >>>On Fri, Jan 15, 2016 at 07:18:38PM +0000, Geoff Levand wrote:
> > >>>>From: AKASHI Takahiro <takahiro.akashi@linaro.org>
> > >>>>
> > >>>>This patch adds arch specific descriptions about kdump usage on arm64
> > >>>>to kdump.txt.
> > >>>>
> > >>>>Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
> > >>>>---
> > >>>>  Documentation/kdump/kdump.txt | 23 ++++++++++++++++++++++-
> > >>>>  1 file changed, 22 insertions(+), 1 deletion(-)
> > >>>>
> > >>>>diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
> > >>>>index bc4bd5a..36cf978 100644
> > >>>>--- a/Documentation/kdump/kdump.txt
> > >>>>+++ b/Documentation/kdump/kdump.txt
> > >>>>@@ -18,7 +18,7 @@ memory image to a dump file on the local disk, or across the network to
> > >>>>  a remote system.
> > >>>>
> > >>>>  Kdump and kexec are currently supported on the x86, x86_64, ppc64, ia64,
> > >>>>-s390x and arm architectures.
> > >>>>+s390x, arm and arm64 architectures.
> > >>>>
> > >>>>  When the system kernel boots, it reserves a small section of memory for
> > >>>>  the dump-capture kernel. This ensures that ongoing Direct Memory Access
> > >>>>@@ -249,6 +249,20 @@ Dump-capture kernel config options (Arch Dependent, arm)
> > >>>>
> > >>>>      AUTO_ZRELADDR=y
> > >>>>
> > >>>>+Dump-capture kernel config options (Arch Dependent, arm64)
> > >>>>+----------------------------------------------------------
> > >>>>+
> > >>>>+1) The maximum memory size on the dump-capture kernel must be limited by
> > >>>>+   specifying:
> > >>>>+
> > >>>>+   mem=X[MG]
> > >>>>+
> > >>>>+   where X should be less than or equal to the size in "crashkernel="
> > >>>>+   boot parameter. Kexec-tools will automatically add this.
> > >>>
> > >>>
> > >>>This is extremely fragile, and will trivially fail when the kernel can
> > >>>be loaded anywhere (see [1]).
> > >>
> > >>As I said before, this restriction also exists on arm, but I understand
> > >>that recent Ard's patches break it.
> > >>
> > >>>We must explicitly describe the set of regions the crash kernel may use
> > >>>(i.e. we need base and size). NAK in the absence of that.
> > >>
> > >>There seem to exist several approaches:
> > >>(a) use a device-tree property, "linux,usable-memory", in addition to "reg"
> > >>     under "memory" node
> > >>(b) use a kernel's early parameter, "memmap=nn[@#$]ss"
> > >>
> > >>Power PC takes (a), while this does not work on efi-started kernel
> > >>because dtb has no "memory" nodes under efi.
> > >>X86 takes (b). If we take this, we will need to overwrite a weak
> > >>early_init_dt_add_memory().
> > >
> > >X86 takes another way in latest kexec-tools and kexec_file_load, that is
> > >recreating E820 table and pass it to kexec/kdump kernel, if the entries
> > >are over E820 limitation then turn to use setup_data list for remain
> > >entries.
> > 
> > Thanks. I will visit x86 code again.
> > 
> > >I think it is X86 specific. Personally I think device tree property is
> > >better.
> > 
> > Do you think so?
> 
> I'm not sure it is the best way. For X86 we run into problem with
> memmap= design, one example is pci domain X (X>1) need the pci memory
> ranges being passed to kdump kernel. When we passed reserved ranges in /proc/iomem
> to 2nd kernel we find that cmdline[] array is not big enough.

I'm not sure how PCI ranges relate to the memory map used for normal
memory (i.e. RAM), though I'm probably missing some caveat with the way
ACPI and UEFI describe PCI. Why does memmap= affect PCI memory?

If the kernel got the rest of its system topology from DT, the PCI
regions would be described there.

> Do you think for arm64 only usable memory is necessary to let kdump kernel
> know? I'm curious about how arm64 kernel get all memory layout from boot loader,
> via UEFI memmap?

When booted via EFI, we use the EFI memory map. The EFI stub handles
acquring the relevant information and passing that to the first kernel
in the DTB (see Documentation/arm/uefi.txt).

A kexec'd kernel should simply inherit that. So long as the DTB and/or
UEFI tables in memory are the same, it would be the same as a cold boot.

In the !EFI case, we use the memory nodes in the DTB. Only in this case
could usable-memory properties in memory nodes make sense. I'd prefer a
uniform property under /chosen for both cases.

Thanks,
Mark.

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 18/19] arm64: kdump: update a kernel doc
  2016-01-19 12:51               ` Mark Rutland
@ 2016-01-19 13:45                 ` Dave Young
  -1 siblings, 0 replies; 174+ messages in thread
From: Dave Young @ 2016-01-19 13:45 UTC (permalink / raw)
  To: linux-arm-kernel

On 01/19/16 at 12:51pm, Mark Rutland wrote:
> On Tue, Jan 19, 2016 at 08:28:48PM +0800, Dave Young wrote:
> > On 01/19/16 at 02:35pm, AKASHI Takahiro wrote:
> > > On 01/19/2016 10:43 AM, Dave Young wrote:
> > > >On 01/18/16 at 07:26pm, AKASHI Takahiro wrote:
> > > >>On 01/16/2016 05:16 AM, Mark Rutland wrote:
> > > >>>On Fri, Jan 15, 2016 at 07:18:38PM +0000, Geoff Levand wrote:
> > > >>>>From: AKASHI Takahiro <takahiro.akashi@linaro.org>
> > > >>>>
> > > >>>>This patch adds arch specific descriptions about kdump usage on arm64
> > > >>>>to kdump.txt.
> > > >>>>
> > > >>>>Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
> > > >>>>---
> > > >>>>  Documentation/kdump/kdump.txt | 23 ++++++++++++++++++++++-
> > > >>>>  1 file changed, 22 insertions(+), 1 deletion(-)
> > > >>>>
> > > >>>>diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
> > > >>>>index bc4bd5a..36cf978 100644
> > > >>>>--- a/Documentation/kdump/kdump.txt
> > > >>>>+++ b/Documentation/kdump/kdump.txt
> > > >>>>@@ -18,7 +18,7 @@ memory image to a dump file on the local disk, or across the network to
> > > >>>>  a remote system.
> > > >>>>
> > > >>>>  Kdump and kexec are currently supported on the x86, x86_64, ppc64, ia64,
> > > >>>>-s390x and arm architectures.
> > > >>>>+s390x, arm and arm64 architectures.
> > > >>>>
> > > >>>>  When the system kernel boots, it reserves a small section of memory for
> > > >>>>  the dump-capture kernel. This ensures that ongoing Direct Memory Access
> > > >>>>@@ -249,6 +249,20 @@ Dump-capture kernel config options (Arch Dependent, arm)
> > > >>>>
> > > >>>>      AUTO_ZRELADDR=y
> > > >>>>
> > > >>>>+Dump-capture kernel config options (Arch Dependent, arm64)
> > > >>>>+----------------------------------------------------------
> > > >>>>+
> > > >>>>+1) The maximum memory size on the dump-capture kernel must be limited by
> > > >>>>+   specifying:
> > > >>>>+
> > > >>>>+   mem=X[MG]
> > > >>>>+
> > > >>>>+   where X should be less than or equal to the size in "crashkernel="
> > > >>>>+   boot parameter. Kexec-tools will automatically add this.
> > > >>>
> > > >>>
> > > >>>This is extremely fragile, and will trivially fail when the kernel can
> > > >>>be loaded anywhere (see [1]).
> > > >>
> > > >>As I said before, this restriction also exists on arm, but I understand
> > > >>that recent Ard's patches break it.
> > > >>
> > > >>>We must explicitly describe the set of regions the crash kernel may use
> > > >>>(i.e. we need base and size). NAK in the absence of that.
> > > >>
> > > >>There seem to exist several approaches:
> > > >>(a) use a device-tree property, "linux,usable-memory", in addition to "reg"
> > > >>     under "memory" node
> > > >>(b) use a kernel's early parameter, "memmap=nn[@#$]ss"
> > > >>
> > > >>Power PC takes (a), while this does not work on efi-started kernel
> > > >>because dtb has no "memory" nodes under efi.
> > > >>X86 takes (b). If we take this, we will need to overwrite a weak
> > > >>early_init_dt_add_memory().
> > > >
> > > >X86 takes another way in latest kexec-tools and kexec_file_load, that is
> > > >recreating E820 table and pass it to kexec/kdump kernel, if the entries
> > > >are over E820 limitation then turn to use setup_data list for remain
> > > >entries.
> > > 
> > > Thanks. I will visit x86 code again.
> > > 
> > > >I think it is X86 specific. Personally I think device tree property is
> > > >better.
> > > 
> > > Do you think so?
> > 
> > I'm not sure it is the best way. For X86 we run into problem with
> > memmap= design, one example is pci domain X (X>1) need the pci memory
> > ranges being passed to kdump kernel. When we passed reserved ranges in /proc/iomem
> > to 2nd kernel we find that cmdline[] array is not big enough.
> 
> I'm not sure how PCI ranges relate to the memory map used for normal
> memory (i.e. RAM), though I'm probably missing some caveat with the way
> ACPI and UEFI describe PCI. Why does memmap= affect PCI memory?

Here is the old patch which was rejected in kexec-tools:
http://lists.infradead.org/pipermail/kexec/2013-February/007924.html

> 
> If the kernel got the rest of its system topology from DT, the PCI
> regions would be described there.

Yes, if kdump kernel use same DT as 1st kernel.

> 
> > Do you think for arm64 only usable memory is necessary to let kdump kernel
> > know? I'm curious about how arm64 kernel get all memory layout from boot loader,
> > via UEFI memmap?
> 
> When booted via EFI, we use the EFI memory map. The EFI stub handles
> acquring the relevant information and passing that to the first kernel
> in the DTB (see Documentation/arm/uefi.txt).

Ok, thanks for the pointer. So in dt we are just have uefi memmap infomation
instead of memory nodes details.. 

> 
> A kexec'd kernel should simply inherit that. So long as the DTB and/or
> UEFI tables in memory are the same, it would be the same as a cold boot.

For kexec all memory ranges are same, for kdump we need use original reserved
range with crashkernel= as usable memory and all other orignal usable ranges
are not usable anymore. 

Is it possible to modify uefi memmap for kdump case?

> 
> In the !EFI case, we use the memory nodes in the DTB. Only in this case
> could usable-memory properties in memory nodes make sense. I'd prefer a
> uniform property under /chosen for both cases.

We stil use same DTB, need to modify the DT and update the usable and unusable
nodes for kdump?

> Thanks,
> Mark.

Thanks
Dave

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 18/19] arm64: kdump: update a kernel doc
@ 2016-01-19 13:45                 ` Dave Young
  0 siblings, 0 replies; 174+ messages in thread
From: Dave Young @ 2016-01-19 13:45 UTC (permalink / raw)
  To: Mark Rutland
  Cc: ard.biesheuvel, Geoff Levand, Catalin Marinas, Will Deacon,
	AKASHI Takahiro, James Morse, christoffer.dall, marc.zyngier,
	kexec, linux-arm-kernel

On 01/19/16 at 12:51pm, Mark Rutland wrote:
> On Tue, Jan 19, 2016 at 08:28:48PM +0800, Dave Young wrote:
> > On 01/19/16 at 02:35pm, AKASHI Takahiro wrote:
> > > On 01/19/2016 10:43 AM, Dave Young wrote:
> > > >On 01/18/16 at 07:26pm, AKASHI Takahiro wrote:
> > > >>On 01/16/2016 05:16 AM, Mark Rutland wrote:
> > > >>>On Fri, Jan 15, 2016 at 07:18:38PM +0000, Geoff Levand wrote:
> > > >>>>From: AKASHI Takahiro <takahiro.akashi@linaro.org>
> > > >>>>
> > > >>>>This patch adds arch specific descriptions about kdump usage on arm64
> > > >>>>to kdump.txt.
> > > >>>>
> > > >>>>Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
> > > >>>>---
> > > >>>>  Documentation/kdump/kdump.txt | 23 ++++++++++++++++++++++-
> > > >>>>  1 file changed, 22 insertions(+), 1 deletion(-)
> > > >>>>
> > > >>>>diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
> > > >>>>index bc4bd5a..36cf978 100644
> > > >>>>--- a/Documentation/kdump/kdump.txt
> > > >>>>+++ b/Documentation/kdump/kdump.txt
> > > >>>>@@ -18,7 +18,7 @@ memory image to a dump file on the local disk, or across the network to
> > > >>>>  a remote system.
> > > >>>>
> > > >>>>  Kdump and kexec are currently supported on the x86, x86_64, ppc64, ia64,
> > > >>>>-s390x and arm architectures.
> > > >>>>+s390x, arm and arm64 architectures.
> > > >>>>
> > > >>>>  When the system kernel boots, it reserves a small section of memory for
> > > >>>>  the dump-capture kernel. This ensures that ongoing Direct Memory Access
> > > >>>>@@ -249,6 +249,20 @@ Dump-capture kernel config options (Arch Dependent, arm)
> > > >>>>
> > > >>>>      AUTO_ZRELADDR=y
> > > >>>>
> > > >>>>+Dump-capture kernel config options (Arch Dependent, arm64)
> > > >>>>+----------------------------------------------------------
> > > >>>>+
> > > >>>>+1) The maximum memory size on the dump-capture kernel must be limited by
> > > >>>>+   specifying:
> > > >>>>+
> > > >>>>+   mem=X[MG]
> > > >>>>+
> > > >>>>+   where X should be less than or equal to the size in "crashkernel="
> > > >>>>+   boot parameter. Kexec-tools will automatically add this.
> > > >>>
> > > >>>
> > > >>>This is extremely fragile, and will trivially fail when the kernel can
> > > >>>be loaded anywhere (see [1]).
> > > >>
> > > >>As I said before, this restriction also exists on arm, but I understand
> > > >>that recent Ard's patches break it.
> > > >>
> > > >>>We must explicitly describe the set of regions the crash kernel may use
> > > >>>(i.e. we need base and size). NAK in the absence of that.
> > > >>
> > > >>There seem to exist several approaches:
> > > >>(a) use a device-tree property, "linux,usable-memory", in addition to "reg"
> > > >>     under "memory" node
> > > >>(b) use a kernel's early parameter, "memmap=nn[@#$]ss"
> > > >>
> > > >>Power PC takes (a), while this does not work on efi-started kernel
> > > >>because dtb has no "memory" nodes under efi.
> > > >>X86 takes (b). If we take this, we will need to overwrite a weak
> > > >>early_init_dt_add_memory().
> > > >
> > > >X86 takes another way in latest kexec-tools and kexec_file_load, that is
> > > >recreating E820 table and pass it to kexec/kdump kernel, if the entries
> > > >are over E820 limitation then turn to use setup_data list for remain
> > > >entries.
> > > 
> > > Thanks. I will visit x86 code again.
> > > 
> > > >I think it is X86 specific. Personally I think device tree property is
> > > >better.
> > > 
> > > Do you think so?
> > 
> > I'm not sure it is the best way. For X86 we run into problem with
> > memmap= design, one example is pci domain X (X>1) need the pci memory
> > ranges being passed to kdump kernel. When we passed reserved ranges in /proc/iomem
> > to 2nd kernel we find that cmdline[] array is not big enough.
> 
> I'm not sure how PCI ranges relate to the memory map used for normal
> memory (i.e. RAM), though I'm probably missing some caveat with the way
> ACPI and UEFI describe PCI. Why does memmap= affect PCI memory?

Here is the old patch which was rejected in kexec-tools:
http://lists.infradead.org/pipermail/kexec/2013-February/007924.html

> 
> If the kernel got the rest of its system topology from DT, the PCI
> regions would be described there.

Yes, if kdump kernel use same DT as 1st kernel.

> 
> > Do you think for arm64 only usable memory is necessary to let kdump kernel
> > know? I'm curious about how arm64 kernel get all memory layout from boot loader,
> > via UEFI memmap?
> 
> When booted via EFI, we use the EFI memory map. The EFI stub handles
> acquring the relevant information and passing that to the first kernel
> in the DTB (see Documentation/arm/uefi.txt).

Ok, thanks for the pointer. So in dt we are just have uefi memmap infomation
instead of memory nodes details.. 

> 
> A kexec'd kernel should simply inherit that. So long as the DTB and/or
> UEFI tables in memory are the same, it would be the same as a cold boot.

For kexec all memory ranges are same, for kdump we need use original reserved
range with crashkernel= as usable memory and all other orignal usable ranges
are not usable anymore. 

Is it possible to modify uefi memmap for kdump case?

> 
> In the !EFI case, we use the memory nodes in the DTB. Only in this case
> could usable-memory properties in memory nodes make sense. I'd prefer a
> uniform property under /chosen for both cases.

We stil use same DTB, need to modify the DT and update the usable and unusable
nodes for kdump?

> Thanks,
> Mark.

Thanks
Dave

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 18/19] arm64: kdump: update a kernel doc
  2016-01-19 12:17           ` Mark Rutland
@ 2016-01-19 13:52             ` Dave Young
  -1 siblings, 0 replies; 174+ messages in thread
From: Dave Young @ 2016-01-19 13:52 UTC (permalink / raw)
  To: linux-arm-kernel

On 01/19/16 at 12:17pm, Mark Rutland wrote:
> On Tue, Jan 19, 2016 at 09:43:32AM +0800, Dave Young wrote:
> > On 01/18/16 at 07:26pm, AKASHI Takahiro wrote:
> > > On 01/16/2016 05:16 AM, Mark Rutland wrote:
> > > >On Fri, Jan 15, 2016 at 07:18:38PM +0000, Geoff Levand wrote:
> > > >>From: AKASHI Takahiro <takahiro.akashi@linaro.org>
> > > >>
> > > >>This patch adds arch specific descriptions about kdump usage on arm64
> > > >>to kdump.txt.
> > > >>
> > > >>Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
> > > >>---
> > > >>  Documentation/kdump/kdump.txt | 23 ++++++++++++++++++++++-
> > > >>  1 file changed, 22 insertions(+), 1 deletion(-)
> > > >>
> > > >>diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
> > > >>index bc4bd5a..36cf978 100644
> > > >>--- a/Documentation/kdump/kdump.txt
> > > >>+++ b/Documentation/kdump/kdump.txt
> > > >>@@ -18,7 +18,7 @@ memory image to a dump file on the local disk, or across the network to
> > > >>  a remote system.
> > > >>
> > > >>  Kdump and kexec are currently supported on the x86, x86_64, ppc64, ia64,
> > > >>-s390x and arm architectures.
> > > >>+s390x, arm and arm64 architectures.
> > > >>
> > > >>  When the system kernel boots, it reserves a small section of memory for
> > > >>  the dump-capture kernel. This ensures that ongoing Direct Memory Access
> > > >>@@ -249,6 +249,20 @@ Dump-capture kernel config options (Arch Dependent, arm)
> > > >>
> > > >>      AUTO_ZRELADDR=y
> > > >>
> > > >>+Dump-capture kernel config options (Arch Dependent, arm64)
> > > >>+----------------------------------------------------------
> > > >>+
> > > >>+1) The maximum memory size on the dump-capture kernel must be limited by
> > > >>+   specifying:
> > > >>+
> > > >>+   mem=X[MG]
> > > >>+
> > > >>+   where X should be less than or equal to the size in "crashkernel="
> > > >>+   boot parameter. Kexec-tools will automatically add this.
> > > >
> > > >
> > > >This is extremely fragile, and will trivially fail when the kernel can
> > > >be loaded anywhere (see [1]).
> > > 
> > > As I said before, this restriction also exists on arm, but I understand
> > > that recent Ard's patches break it.
> > > 
> > > >We must explicitly describe the set of regions the crash kernel may use
> > > >(i.e. we need base and size). NAK in the absence of that.
> > > 
> > > There seem to exist several approaches:
> > > (a) use a device-tree property, "linux,usable-memory", in addition to "reg"
> > >     under "memory" node
> > > (b) use a kernel's early parameter, "memmap=nn[@#$]ss"
> > > 
> > > Power PC takes (a), while this does not work on efi-started kernel
> > > because dtb has no "memory" nodes under efi.
> > > X86 takes (b). If we take this, we will need to overwrite a weak
> > > early_init_dt_add_memory().
> > 
> > X86 takes another way in latest kexec-tools and kexec_file_load, that is
> > recreating E820 table and pass it to kexec/kdump kernel, if the entries
> > are over E820 limitation then turn to use setup_data list for remain
> > entries.
> 
> This would imply modifying the EFI memory map or the memory nodes, which
> I'm not keen on.
> 
> I would prefer that they are left _pristine_, and we describe the
> restriction on the kdump kernel with additional properties under
> /chosen.
> 
> That leaves us with more useful information about the environment of the
> first kernel, is simpler for userspace (it's resilient to updates to the
> UEFI memory map spec, for example), and is simple for the crash kernel.

In theory kexec as boot loader should prepare correct efi memmap and pass
to kernel, but as you said yes it will increase complexity. We need banlance
them.

Thanks
Dave

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 18/19] arm64: kdump: update a kernel doc
@ 2016-01-19 13:52             ` Dave Young
  0 siblings, 0 replies; 174+ messages in thread
From: Dave Young @ 2016-01-19 13:52 UTC (permalink / raw)
  To: Mark Rutland
  Cc: ard.biesheuvel, Geoff Levand, Catalin Marinas, Will Deacon,
	AKASHI Takahiro, James Morse, linux-arm-kernel, marc.zyngier,
	kexec, christoffer.dall

On 01/19/16 at 12:17pm, Mark Rutland wrote:
> On Tue, Jan 19, 2016 at 09:43:32AM +0800, Dave Young wrote:
> > On 01/18/16 at 07:26pm, AKASHI Takahiro wrote:
> > > On 01/16/2016 05:16 AM, Mark Rutland wrote:
> > > >On Fri, Jan 15, 2016 at 07:18:38PM +0000, Geoff Levand wrote:
> > > >>From: AKASHI Takahiro <takahiro.akashi@linaro.org>
> > > >>
> > > >>This patch adds arch specific descriptions about kdump usage on arm64
> > > >>to kdump.txt.
> > > >>
> > > >>Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
> > > >>---
> > > >>  Documentation/kdump/kdump.txt | 23 ++++++++++++++++++++++-
> > > >>  1 file changed, 22 insertions(+), 1 deletion(-)
> > > >>
> > > >>diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
> > > >>index bc4bd5a..36cf978 100644
> > > >>--- a/Documentation/kdump/kdump.txt
> > > >>+++ b/Documentation/kdump/kdump.txt
> > > >>@@ -18,7 +18,7 @@ memory image to a dump file on the local disk, or across the network to
> > > >>  a remote system.
> > > >>
> > > >>  Kdump and kexec are currently supported on the x86, x86_64, ppc64, ia64,
> > > >>-s390x and arm architectures.
> > > >>+s390x, arm and arm64 architectures.
> > > >>
> > > >>  When the system kernel boots, it reserves a small section of memory for
> > > >>  the dump-capture kernel. This ensures that ongoing Direct Memory Access
> > > >>@@ -249,6 +249,20 @@ Dump-capture kernel config options (Arch Dependent, arm)
> > > >>
> > > >>      AUTO_ZRELADDR=y
> > > >>
> > > >>+Dump-capture kernel config options (Arch Dependent, arm64)
> > > >>+----------------------------------------------------------
> > > >>+
> > > >>+1) The maximum memory size on the dump-capture kernel must be limited by
> > > >>+   specifying:
> > > >>+
> > > >>+   mem=X[MG]
> > > >>+
> > > >>+   where X should be less than or equal to the size in "crashkernel="
> > > >>+   boot parameter. Kexec-tools will automatically add this.
> > > >
> > > >
> > > >This is extremely fragile, and will trivially fail when the kernel can
> > > >be loaded anywhere (see [1]).
> > > 
> > > As I said before, this restriction also exists on arm, but I understand
> > > that recent Ard's patches break it.
> > > 
> > > >We must explicitly describe the set of regions the crash kernel may use
> > > >(i.e. we need base and size). NAK in the absence of that.
> > > 
> > > There seem to exist several approaches:
> > > (a) use a device-tree property, "linux,usable-memory", in addition to "reg"
> > >     under "memory" node
> > > (b) use a kernel's early parameter, "memmap=nn[@#$]ss"
> > > 
> > > Power PC takes (a), while this does not work on efi-started kernel
> > > because dtb has no "memory" nodes under efi.
> > > X86 takes (b). If we take this, we will need to overwrite a weak
> > > early_init_dt_add_memory().
> > 
> > X86 takes another way in latest kexec-tools and kexec_file_load, that is
> > recreating E820 table and pass it to kexec/kdump kernel, if the entries
> > are over E820 limitation then turn to use setup_data list for remain
> > entries.
> 
> This would imply modifying the EFI memory map or the memory nodes, which
> I'm not keen on.
> 
> I would prefer that they are left _pristine_, and we describe the
> restriction on the kdump kernel with additional properties under
> /chosen.
> 
> That leaves us with more useful information about the environment of the
> first kernel, is simpler for userspace (it's resilient to updates to the
> UEFI memory map spec, for example), and is simple for the crash kernel.

In theory kexec as boot loader should prepare correct efi memmap and pass
to kernel, but as you said yes it will increase complexity. We need banlance
them.

Thanks
Dave

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 18/19] arm64: kdump: update a kernel doc
  2016-01-19 13:45                 ` Dave Young
@ 2016-01-19 14:01                   ` Mark Rutland
  -1 siblings, 0 replies; 174+ messages in thread
From: Mark Rutland @ 2016-01-19 14:01 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Jan 19, 2016 at 09:45:53PM +0800, Dave Young wrote:
> On 01/19/16 at 12:51pm, Mark Rutland wrote:
> > On Tue, Jan 19, 2016 at 08:28:48PM +0800, Dave Young wrote:
> > > On 01/19/16 at 02:35pm, AKASHI Takahiro wrote:
> > > > On 01/19/2016 10:43 AM, Dave Young wrote:
> > > > >X86 takes another way in latest kexec-tools and kexec_file_load, that is
> > > > >recreating E820 table and pass it to kexec/kdump kernel, if the entries
> > > > >are over E820 limitation then turn to use setup_data list for remain
> > > > >entries.
> > > > 
> > > > Thanks. I will visit x86 code again.
> > > > 
> > > > >I think it is X86 specific. Personally I think device tree property is
> > > > >better.
> > > > 
> > > > Do you think so?
> > > 
> > > I'm not sure it is the best way. For X86 we run into problem with
> > > memmap= design, one example is pci domain X (X>1) need the pci memory
> > > ranges being passed to kdump kernel. When we passed reserved ranges in /proc/iomem
> > > to 2nd kernel we find that cmdline[] array is not big enough.
> > 
> > I'm not sure how PCI ranges relate to the memory map used for normal
> > memory (i.e. RAM), though I'm probably missing some caveat with the way
> > ACPI and UEFI describe PCI. Why does memmap= affect PCI memory?
> 
> Here is the old patch which was rejected in kexec-tools:
> http://lists.infradead.org/pipermail/kexec/2013-February/007924.html
> 
> > 
> > If the kernel got the rest of its system topology from DT, the PCI
> > regions would be described there.
> 
> Yes, if kdump kernel use same DT as 1st kernel.

Other than for testing purposes, I don't see why you'd pass the kdump
kernel a DTB inconsistent with that the 1st kernel was passsed (other
than some proerties under /chosen).

We added /sys/firmware/fdt specifically to allow the kexec tools to get
the exact DTB the first kernel used. There's no reason for tools to have
to make something up.

> > > Do you think for arm64 only usable memory is necessary to let kdump kernel
> > > know? I'm curious about how arm64 kernel get all memory layout from boot loader,
> > > via UEFI memmap?
> > 
> > When booted via EFI, we use the EFI memory map. The EFI stub handles
> > acquring the relevant information and passing that to the first kernel
> > in the DTB (see Documentation/arm/uefi.txt).
> 
> Ok, thanks for the pointer. So in dt we are just have uefi memmap infomation
> instead of memory nodes details.. 

When booted via EFI, yes.

For NUMA topology in !ACPI kernels, we might need to also retain and
parse memory nodes, but only for toplogy information. The kernel would
still only use memory as described by the EFI memory map.

There's a horrible edge case I've spotted if performing a chain of
cross-endian kexecs: LE -> BE -> LE, as the BE kernel would have to
respect the EFI memory map so as to avoid corrupting it for the
subsequent LE kernel. Other than this I believe everything should just
work.

> > A kexec'd kernel should simply inherit that. So long as the DTB and/or
> > UEFI tables in memory are the same, it would be the same as a cold boot.
> 
> For kexec all memory ranges are same, for kdump we need use original reserved
> range with crashkernel= as usable memory and all other orignal usable ranges
> are not usable anymore. 

Sure. This is what I believe we should expose with an additional
property under /chosen, while keeping everything else pristine.

The crash kernel can then limit itself to that region, while it would
have the information of the full memory map (which it could log and/or
use to drive other dumping).

> Is it possible to modify uefi memmap for kdump case?

Technically it would be possible, however I don't think it's necessary,
and I think it would be disadvantageous to do so.

Describing the range(s) the crash kernel can use in separate properties
under /chosen has a number of advantages.

> > In the !EFI case, we use the memory nodes in the DTB. Only in this case
> > could usable-memory properties in memory nodes make sense. I'd prefer a
> > uniform property under /chosen for both cases.
> 
> We stil use same DTB, need to modify the DT and update the usable and unusable
> nodes for kdump?

We'd have a (slightly) modified DTB that contained additional properties
describing the range(s) reserved for use by the crash kernel.

Other than those properties under /chosen (e.g. the command line, initrd
pointers if any), it would be the original DTB.

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 18/19] arm64: kdump: update a kernel doc
@ 2016-01-19 14:01                   ` Mark Rutland
  0 siblings, 0 replies; 174+ messages in thread
From: Mark Rutland @ 2016-01-19 14:01 UTC (permalink / raw)
  To: Dave Young
  Cc: ard.biesheuvel, Geoff Levand, Catalin Marinas, Will Deacon,
	AKASHI Takahiro, James Morse, christoffer.dall, marc.zyngier,
	kexec, linux-arm-kernel

On Tue, Jan 19, 2016 at 09:45:53PM +0800, Dave Young wrote:
> On 01/19/16 at 12:51pm, Mark Rutland wrote:
> > On Tue, Jan 19, 2016 at 08:28:48PM +0800, Dave Young wrote:
> > > On 01/19/16 at 02:35pm, AKASHI Takahiro wrote:
> > > > On 01/19/2016 10:43 AM, Dave Young wrote:
> > > > >X86 takes another way in latest kexec-tools and kexec_file_load, that is
> > > > >recreating E820 table and pass it to kexec/kdump kernel, if the entries
> > > > >are over E820 limitation then turn to use setup_data list for remain
> > > > >entries.
> > > > 
> > > > Thanks. I will visit x86 code again.
> > > > 
> > > > >I think it is X86 specific. Personally I think device tree property is
> > > > >better.
> > > > 
> > > > Do you think so?
> > > 
> > > I'm not sure it is the best way. For X86 we run into problem with
> > > memmap= design, one example is pci domain X (X>1) need the pci memory
> > > ranges being passed to kdump kernel. When we passed reserved ranges in /proc/iomem
> > > to 2nd kernel we find that cmdline[] array is not big enough.
> > 
> > I'm not sure how PCI ranges relate to the memory map used for normal
> > memory (i.e. RAM), though I'm probably missing some caveat with the way
> > ACPI and UEFI describe PCI. Why does memmap= affect PCI memory?
> 
> Here is the old patch which was rejected in kexec-tools:
> http://lists.infradead.org/pipermail/kexec/2013-February/007924.html
> 
> > 
> > If the kernel got the rest of its system topology from DT, the PCI
> > regions would be described there.
> 
> Yes, if kdump kernel use same DT as 1st kernel.

Other than for testing purposes, I don't see why you'd pass the kdump
kernel a DTB inconsistent with that the 1st kernel was passsed (other
than some proerties under /chosen).

We added /sys/firmware/fdt specifically to allow the kexec tools to get
the exact DTB the first kernel used. There's no reason for tools to have
to make something up.

> > > Do you think for arm64 only usable memory is necessary to let kdump kernel
> > > know? I'm curious about how arm64 kernel get all memory layout from boot loader,
> > > via UEFI memmap?
> > 
> > When booted via EFI, we use the EFI memory map. The EFI stub handles
> > acquring the relevant information and passing that to the first kernel
> > in the DTB (see Documentation/arm/uefi.txt).
> 
> Ok, thanks for the pointer. So in dt we are just have uefi memmap infomation
> instead of memory nodes details.. 

When booted via EFI, yes.

For NUMA topology in !ACPI kernels, we might need to also retain and
parse memory nodes, but only for toplogy information. The kernel would
still only use memory as described by the EFI memory map.

There's a horrible edge case I've spotted if performing a chain of
cross-endian kexecs: LE -> BE -> LE, as the BE kernel would have to
respect the EFI memory map so as to avoid corrupting it for the
subsequent LE kernel. Other than this I believe everything should just
work.

> > A kexec'd kernel should simply inherit that. So long as the DTB and/or
> > UEFI tables in memory are the same, it would be the same as a cold boot.
> 
> For kexec all memory ranges are same, for kdump we need use original reserved
> range with crashkernel= as usable memory and all other orignal usable ranges
> are not usable anymore. 

Sure. This is what I believe we should expose with an additional
property under /chosen, while keeping everything else pristine.

The crash kernel can then limit itself to that region, while it would
have the information of the full memory map (which it could log and/or
use to drive other dumping).

> Is it possible to modify uefi memmap for kdump case?

Technically it would be possible, however I don't think it's necessary,
and I think it would be disadvantageous to do so.

Describing the range(s) the crash kernel can use in separate properties
under /chosen has a number of advantages.

> > In the !EFI case, we use the memory nodes in the DTB. Only in this case
> > could usable-memory properties in memory nodes make sense. I'd prefer a
> > uniform property under /chosen for both cases.
> 
> We stil use same DTB, need to modify the DT and update the usable and unusable
> nodes for kdump?

We'd have a (slightly) modified DTB that contained additional properties
describing the range(s) reserved for use by the crash kernel.

Other than those properties under /chosen (e.g. the command line, initrd
pointers if any), it would be the original DTB.

Thanks,
Mark.

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 18/19] arm64: kdump: update a kernel doc
  2016-01-19 13:52             ` Dave Young
@ 2016-01-19 14:05               ` Mark Rutland
  -1 siblings, 0 replies; 174+ messages in thread
From: Mark Rutland @ 2016-01-19 14:05 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Jan 19, 2016 at 09:52:33PM +0800, Dave Young wrote:
> On 01/19/16 at 12:17pm, Mark Rutland wrote:
> > On Tue, Jan 19, 2016 at 09:43:32AM +0800, Dave Young wrote:
> > > On 01/18/16 at 07:26pm, AKASHI Takahiro wrote:
> > > > On 01/16/2016 05:16 AM, Mark Rutland wrote:
> > > > >On Fri, Jan 15, 2016 at 07:18:38PM +0000, Geoff Levand wrote:
> > > > >>From: AKASHI Takahiro <takahiro.akashi@linaro.org>
> > > > >>
> > > > >>This patch adds arch specific descriptions about kdump usage on arm64
> > > > >>to kdump.txt.
> > > > >>
> > > > >>Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
> > > > >>---
> > > > >>  Documentation/kdump/kdump.txt | 23 ++++++++++++++++++++++-
> > > > >>  1 file changed, 22 insertions(+), 1 deletion(-)
> > > > >>
> > > > >>diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
> > > > >>index bc4bd5a..36cf978 100644
> > > > >>--- a/Documentation/kdump/kdump.txt
> > > > >>+++ b/Documentation/kdump/kdump.txt
> > > > >>@@ -18,7 +18,7 @@ memory image to a dump file on the local disk, or across the network to
> > > > >>  a remote system.
> > > > >>
> > > > >>  Kdump and kexec are currently supported on the x86, x86_64, ppc64, ia64,
> > > > >>-s390x and arm architectures.
> > > > >>+s390x, arm and arm64 architectures.
> > > > >>
> > > > >>  When the system kernel boots, it reserves a small section of memory for
> > > > >>  the dump-capture kernel. This ensures that ongoing Direct Memory Access
> > > > >>@@ -249,6 +249,20 @@ Dump-capture kernel config options (Arch Dependent, arm)
> > > > >>
> > > > >>      AUTO_ZRELADDR=y
> > > > >>
> > > > >>+Dump-capture kernel config options (Arch Dependent, arm64)
> > > > >>+----------------------------------------------------------
> > > > >>+
> > > > >>+1) The maximum memory size on the dump-capture kernel must be limited by
> > > > >>+   specifying:
> > > > >>+
> > > > >>+   mem=X[MG]
> > > > >>+
> > > > >>+   where X should be less than or equal to the size in "crashkernel="
> > > > >>+   boot parameter. Kexec-tools will automatically add this.
> > > > >
> > > > >
> > > > >This is extremely fragile, and will trivially fail when the kernel can
> > > > >be loaded anywhere (see [1]).
> > > > 
> > > > As I said before, this restriction also exists on arm, but I understand
> > > > that recent Ard's patches break it.
> > > > 
> > > > >We must explicitly describe the set of regions the crash kernel may use
> > > > >(i.e. we need base and size). NAK in the absence of that.
> > > > 
> > > > There seem to exist several approaches:
> > > > (a) use a device-tree property, "linux,usable-memory", in addition to "reg"
> > > >     under "memory" node
> > > > (b) use a kernel's early parameter, "memmap=nn[@#$]ss"
> > > > 
> > > > Power PC takes (a), while this does not work on efi-started kernel
> > > > because dtb has no "memory" nodes under efi.
> > > > X86 takes (b). If we take this, we will need to overwrite a weak
> > > > early_init_dt_add_memory().
> > > 
> > > X86 takes another way in latest kexec-tools and kexec_file_load, that is
> > > recreating E820 table and pass it to kexec/kdump kernel, if the entries
> > > are over E820 limitation then turn to use setup_data list for remain
> > > entries.
> > 
> > This would imply modifying the EFI memory map or the memory nodes, which
> > I'm not keen on.
> > 
> > I would prefer that they are left _pristine_, and we describe the
> > restriction on the kdump kernel with additional properties under
> > /chosen.
> > 
> > That leaves us with more useful information about the environment of the
> > first kernel, is simpler for userspace (it's resilient to updates to the
> > UEFI memory map spec, for example), and is simple for the crash kernel.
> 
> In theory kexec as boot loader should prepare correct efi memmap and pass
> to kernel, but as you said yes it will increase complexity. We need banlance
> them.

I'd argue that the "correct efi memmap" is what we were given by the
firmware initially -- none of that information is any less true.

For kdump all we need to ensure is that the kdump kernel only uses the
memory that was specially reserved for it by the first kernel. The
simplest way of doing that is to tell the kdump kernel which specific
region(s) of memory were reserved for it, leaving the EFI memory map
alone.

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 18/19] arm64: kdump: update a kernel doc
@ 2016-01-19 14:05               ` Mark Rutland
  0 siblings, 0 replies; 174+ messages in thread
From: Mark Rutland @ 2016-01-19 14:05 UTC (permalink / raw)
  To: Dave Young
  Cc: ard.biesheuvel, Geoff Levand, Catalin Marinas, Will Deacon,
	AKASHI Takahiro, James Morse, linux-arm-kernel, marc.zyngier,
	kexec, christoffer.dall

On Tue, Jan 19, 2016 at 09:52:33PM +0800, Dave Young wrote:
> On 01/19/16 at 12:17pm, Mark Rutland wrote:
> > On Tue, Jan 19, 2016 at 09:43:32AM +0800, Dave Young wrote:
> > > On 01/18/16 at 07:26pm, AKASHI Takahiro wrote:
> > > > On 01/16/2016 05:16 AM, Mark Rutland wrote:
> > > > >On Fri, Jan 15, 2016 at 07:18:38PM +0000, Geoff Levand wrote:
> > > > >>From: AKASHI Takahiro <takahiro.akashi@linaro.org>
> > > > >>
> > > > >>This patch adds arch specific descriptions about kdump usage on arm64
> > > > >>to kdump.txt.
> > > > >>
> > > > >>Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
> > > > >>---
> > > > >>  Documentation/kdump/kdump.txt | 23 ++++++++++++++++++++++-
> > > > >>  1 file changed, 22 insertions(+), 1 deletion(-)
> > > > >>
> > > > >>diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
> > > > >>index bc4bd5a..36cf978 100644
> > > > >>--- a/Documentation/kdump/kdump.txt
> > > > >>+++ b/Documentation/kdump/kdump.txt
> > > > >>@@ -18,7 +18,7 @@ memory image to a dump file on the local disk, or across the network to
> > > > >>  a remote system.
> > > > >>
> > > > >>  Kdump and kexec are currently supported on the x86, x86_64, ppc64, ia64,
> > > > >>-s390x and arm architectures.
> > > > >>+s390x, arm and arm64 architectures.
> > > > >>
> > > > >>  When the system kernel boots, it reserves a small section of memory for
> > > > >>  the dump-capture kernel. This ensures that ongoing Direct Memory Access
> > > > >>@@ -249,6 +249,20 @@ Dump-capture kernel config options (Arch Dependent, arm)
> > > > >>
> > > > >>      AUTO_ZRELADDR=y
> > > > >>
> > > > >>+Dump-capture kernel config options (Arch Dependent, arm64)
> > > > >>+----------------------------------------------------------
> > > > >>+
> > > > >>+1) The maximum memory size on the dump-capture kernel must be limited by
> > > > >>+   specifying:
> > > > >>+
> > > > >>+   mem=X[MG]
> > > > >>+
> > > > >>+   where X should be less than or equal to the size in "crashkernel="
> > > > >>+   boot parameter. Kexec-tools will automatically add this.
> > > > >
> > > > >
> > > > >This is extremely fragile, and will trivially fail when the kernel can
> > > > >be loaded anywhere (see [1]).
> > > > 
> > > > As I said before, this restriction also exists on arm, but I understand
> > > > that recent Ard's patches break it.
> > > > 
> > > > >We must explicitly describe the set of regions the crash kernel may use
> > > > >(i.e. we need base and size). NAK in the absence of that.
> > > > 
> > > > There seem to exist several approaches:
> > > > (a) use a device-tree property, "linux,usable-memory", in addition to "reg"
> > > >     under "memory" node
> > > > (b) use a kernel's early parameter, "memmap=nn[@#$]ss"
> > > > 
> > > > Power PC takes (a), while this does not work on efi-started kernel
> > > > because dtb has no "memory" nodes under efi.
> > > > X86 takes (b). If we take this, we will need to overwrite a weak
> > > > early_init_dt_add_memory().
> > > 
> > > X86 takes another way in latest kexec-tools and kexec_file_load, that is
> > > recreating E820 table and pass it to kexec/kdump kernel, if the entries
> > > are over E820 limitation then turn to use setup_data list for remain
> > > entries.
> > 
> > This would imply modifying the EFI memory map or the memory nodes, which
> > I'm not keen on.
> > 
> > I would prefer that they are left _pristine_, and we describe the
> > restriction on the kdump kernel with additional properties under
> > /chosen.
> > 
> > That leaves us with more useful information about the environment of the
> > first kernel, is simpler for userspace (it's resilient to updates to the
> > UEFI memory map spec, for example), and is simple for the crash kernel.
> 
> In theory kexec as boot loader should prepare correct efi memmap and pass
> to kernel, but as you said yes it will increase complexity. We need banlance
> them.

I'd argue that the "correct efi memmap" is what we were given by the
firmware initially -- none of that information is any less true.

For kdump all we need to ensure is that the kdump kernel only uses the
memory that was specially reserved for it by the first kernel. The
simplest way of doing that is to tell the kdump kernel which specific
region(s) of memory were reserved for it, leaving the EFI memory map
alone.

Thanks,
Mark.

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 00/19] arm64 kexec kernel patches v13
  2016-01-19 12:32   ` Dave Young
@ 2016-01-20  0:15     ` Geoff Levand
  -1 siblings, 0 replies; 174+ messages in thread
From: Geoff Levand @ 2016-01-20  0:15 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, 2016-01-19 at 20:32 +0800, Dave Young wrote:
> Geoff, another question about kexec-tools part is, can the kexec
> -tools code
> been written in kernel? We have the infrastructure for kexec_file_load.

I see no technical reason why the arm64 kernel cannot support
kexec_file_load.

-Geoff

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 00/19] arm64 kexec kernel patches v13
@ 2016-01-20  0:15     ` Geoff Levand
  0 siblings, 0 replies; 174+ messages in thread
From: Geoff Levand @ 2016-01-20  0:15 UTC (permalink / raw)
  To: Dave Young
  Cc: Mark Rutland, marc.zyngier, Catalin Marinas, Will Deacon,
	AKASHI Takahiro, James Morse, christoffer.dall, kexec,
	linux-arm-kernel

On Tue, 2016-01-19 at 20:32 +0800, Dave Young wrote:
> Geoff, another question about kexec-tools part is, can the kexec
> -tools code
> been written in kernel? We have the infrastructure for kexec_file_load.

I see no technical reason why the arm64 kernel cannot support
kexec_file_load.

-Geoff


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 18/19] arm64: kdump: update a kernel doc
  2016-01-19 14:01                   ` Mark Rutland
@ 2016-01-20  2:49                     ` Dave Young
  -1 siblings, 0 replies; 174+ messages in thread
From: Dave Young @ 2016-01-20  2:49 UTC (permalink / raw)
  To: linux-arm-kernel

On 01/19/16 at 02:01pm, Mark Rutland wrote:
> On Tue, Jan 19, 2016 at 09:45:53PM +0800, Dave Young wrote:
> > On 01/19/16 at 12:51pm, Mark Rutland wrote:
> > > On Tue, Jan 19, 2016 at 08:28:48PM +0800, Dave Young wrote:
> > > > On 01/19/16 at 02:35pm, AKASHI Takahiro wrote:
> > > > > On 01/19/2016 10:43 AM, Dave Young wrote:
> > > > > >X86 takes another way in latest kexec-tools and kexec_file_load, that is
> > > > > >recreating E820 table and pass it to kexec/kdump kernel, if the entries
> > > > > >are over E820 limitation then turn to use setup_data list for remain
> > > > > >entries.
> > > > > 
> > > > > Thanks. I will visit x86 code again.
> > > > > 
> > > > > >I think it is X86 specific. Personally I think device tree property is
> > > > > >better.
> > > > > 
> > > > > Do you think so?
> > > > 
> > > > I'm not sure it is the best way. For X86 we run into problem with
> > > > memmap= design, one example is pci domain X (X>1) need the pci memory
> > > > ranges being passed to kdump kernel. When we passed reserved ranges in /proc/iomem
> > > > to 2nd kernel we find that cmdline[] array is not big enough.
> > > 
> > > I'm not sure how PCI ranges relate to the memory map used for normal
> > > memory (i.e. RAM), though I'm probably missing some caveat with the way
> > > ACPI and UEFI describe PCI. Why does memmap= affect PCI memory?
> > 
> > Here is the old patch which was rejected in kexec-tools:
> > http://lists.infradead.org/pipermail/kexec/2013-February/007924.html
> > 
> > > 
> > > If the kernel got the rest of its system topology from DT, the PCI
> > > regions would be described there.
> > 
> > Yes, if kdump kernel use same DT as 1st kernel.
> 
> Other than for testing purposes, I don't see why you'd pass the kdump
> kernel a DTB inconsistent with that the 1st kernel was passsed (other
> than some proerties under /chosen).
> 
> We added /sys/firmware/fdt specifically to allow the kexec tools to get
> the exact DTB the first kernel used. There's no reason for tools to have
> to make something up.

Agreed, but kexec-tools has an option to pass in any dtb files. Who knows
how one will use it unless dropping the option and use /sys/firmware/fdt
unconditionally. 

If we choose to implement kexec_file_load only in kernel, the interfaces
provided are kernel, initrd and cmdline. We can always use same dtb.
 
> 
> > > > Do you think for arm64 only usable memory is necessary to let kdump kernel
> > > > know? I'm curious about how arm64 kernel get all memory layout from boot loader,
> > > > via UEFI memmap?
> > > 
> > > When booted via EFI, we use the EFI memory map. The EFI stub handles
> > > acquring the relevant information and passing that to the first kernel
> > > in the DTB (see Documentation/arm/uefi.txt).
> > 
> > Ok, thanks for the pointer. So in dt we are just have uefi memmap infomation
> > instead of memory nodes details.. 
> 
> When booted via EFI, yes.
> 
> For NUMA topology in !ACPI kernels, we might need to also retain and
> parse memory nodes, but only for toplogy information. The kernel would
> still only use memory as described by the EFI memory map.
> 
> There's a horrible edge case I've spotted if performing a chain of
> cross-endian kexecs: LE -> BE -> LE, as the BE kernel would have to
> respect the EFI memory map so as to avoid corrupting it for the
> subsequent LE kernel. Other than this I believe everything should just
> work.

Firmware do not know kernel endianniess, kernel should respect firmware
maps and adapt to it, it sounds like a generic issue not specfic to kexec.

> 
> > > A kexec'd kernel should simply inherit that. So long as the DTB and/or
> > > UEFI tables in memory are the same, it would be the same as a cold boot.
> > 
> > For kexec all memory ranges are same, for kdump we need use original reserved
> > range with crashkernel= as usable memory and all other orignal usable ranges
> > are not usable anymore. 
> 
> Sure. This is what I believe we should expose with an additional
> property under /chosen, while keeping everything else pristine.
> 
> The crash kernel can then limit itself to that region, while it would
> have the information of the full memory map (which it could log and/or
> use to drive other dumping).

In this way kernel should be aware it is a kdump booting, it is doable though
I feel it is better for kdump kernel in a black box with infomations it
can use just like the 1st kernel. Things here is where we choose to cook
the memory infomation in boot loader or in kernel itself.

> 
> > Is it possible to modify uefi memmap for kdump case?
> 
> Technically it would be possible, however I don't think it's necessary,
> and I think it would be disadvantageous to do so.
> 
> Describing the range(s) the crash kernel can use in separate properties
> under /chosen has a number of advantages.

Ok, I got the points. We have a is_kdump_kernel() by checking if there is
elfcorehdr_addr kernel cmdline. This is mainly for some drivers which
do not work well in kdump kernel some uncertain reasons. But ideally I
think kernel should handle things just like in 1st kernel and avoid to use
it. 

> 
> > > In the !EFI case, we use the memory nodes in the DTB. Only in this case
> > > could usable-memory properties in memory nodes make sense. I'd prefer a
> > > uniform property under /chosen for both cases.
> > 
> > We stil use same DTB, need to modify the DT and update the usable and unusable
> > nodes for kdump?
> 
> We'd have a (slightly) modified DTB that contained additional properties
> describing the range(s) reserved for use by the crash kernel.
> 
> Other than those properties under /chosen (e.g. the command line, initrd
> pointers if any), it would be the original DTB.
> 
> Thanks,
> Mark.

Thanks
Dave

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 18/19] arm64: kdump: update a kernel doc
@ 2016-01-20  2:49                     ` Dave Young
  0 siblings, 0 replies; 174+ messages in thread
From: Dave Young @ 2016-01-20  2:49 UTC (permalink / raw)
  To: Mark Rutland
  Cc: ard.biesheuvel, Geoff Levand, Catalin Marinas, Will Deacon,
	AKASHI Takahiro, James Morse, christoffer.dall, marc.zyngier,
	kexec, linux-arm-kernel

On 01/19/16 at 02:01pm, Mark Rutland wrote:
> On Tue, Jan 19, 2016 at 09:45:53PM +0800, Dave Young wrote:
> > On 01/19/16 at 12:51pm, Mark Rutland wrote:
> > > On Tue, Jan 19, 2016 at 08:28:48PM +0800, Dave Young wrote:
> > > > On 01/19/16 at 02:35pm, AKASHI Takahiro wrote:
> > > > > On 01/19/2016 10:43 AM, Dave Young wrote:
> > > > > >X86 takes another way in latest kexec-tools and kexec_file_load, that is
> > > > > >recreating E820 table and pass it to kexec/kdump kernel, if the entries
> > > > > >are over E820 limitation then turn to use setup_data list for remain
> > > > > >entries.
> > > > > 
> > > > > Thanks. I will visit x86 code again.
> > > > > 
> > > > > >I think it is X86 specific. Personally I think device tree property is
> > > > > >better.
> > > > > 
> > > > > Do you think so?
> > > > 
> > > > I'm not sure it is the best way. For X86 we run into problem with
> > > > memmap= design, one example is pci domain X (X>1) need the pci memory
> > > > ranges being passed to kdump kernel. When we passed reserved ranges in /proc/iomem
> > > > to 2nd kernel we find that cmdline[] array is not big enough.
> > > 
> > > I'm not sure how PCI ranges relate to the memory map used for normal
> > > memory (i.e. RAM), though I'm probably missing some caveat with the way
> > > ACPI and UEFI describe PCI. Why does memmap= affect PCI memory?
> > 
> > Here is the old patch which was rejected in kexec-tools:
> > http://lists.infradead.org/pipermail/kexec/2013-February/007924.html
> > 
> > > 
> > > If the kernel got the rest of its system topology from DT, the PCI
> > > regions would be described there.
> > 
> > Yes, if kdump kernel use same DT as 1st kernel.
> 
> Other than for testing purposes, I don't see why you'd pass the kdump
> kernel a DTB inconsistent with that the 1st kernel was passsed (other
> than some proerties under /chosen).
> 
> We added /sys/firmware/fdt specifically to allow the kexec tools to get
> the exact DTB the first kernel used. There's no reason for tools to have
> to make something up.

Agreed, but kexec-tools has an option to pass in any dtb files. Who knows
how one will use it unless dropping the option and use /sys/firmware/fdt
unconditionally. 

If we choose to implement kexec_file_load only in kernel, the interfaces
provided are kernel, initrd and cmdline. We can always use same dtb.
 
> 
> > > > Do you think for arm64 only usable memory is necessary to let kdump kernel
> > > > know? I'm curious about how arm64 kernel get all memory layout from boot loader,
> > > > via UEFI memmap?
> > > 
> > > When booted via EFI, we use the EFI memory map. The EFI stub handles
> > > acquring the relevant information and passing that to the first kernel
> > > in the DTB (see Documentation/arm/uefi.txt).
> > 
> > Ok, thanks for the pointer. So in dt we are just have uefi memmap infomation
> > instead of memory nodes details.. 
> 
> When booted via EFI, yes.
> 
> For NUMA topology in !ACPI kernels, we might need to also retain and
> parse memory nodes, but only for toplogy information. The kernel would
> still only use memory as described by the EFI memory map.
> 
> There's a horrible edge case I've spotted if performing a chain of
> cross-endian kexecs: LE -> BE -> LE, as the BE kernel would have to
> respect the EFI memory map so as to avoid corrupting it for the
> subsequent LE kernel. Other than this I believe everything should just
> work.

Firmware do not know kernel endianniess, kernel should respect firmware
maps and adapt to it, it sounds like a generic issue not specfic to kexec.

> 
> > > A kexec'd kernel should simply inherit that. So long as the DTB and/or
> > > UEFI tables in memory are the same, it would be the same as a cold boot.
> > 
> > For kexec all memory ranges are same, for kdump we need use original reserved
> > range with crashkernel= as usable memory and all other orignal usable ranges
> > are not usable anymore. 
> 
> Sure. This is what I believe we should expose with an additional
> property under /chosen, while keeping everything else pristine.
> 
> The crash kernel can then limit itself to that region, while it would
> have the information of the full memory map (which it could log and/or
> use to drive other dumping).

In this way kernel should be aware it is a kdump booting, it is doable though
I feel it is better for kdump kernel in a black box with infomations it
can use just like the 1st kernel. Things here is where we choose to cook
the memory infomation in boot loader or in kernel itself.

> 
> > Is it possible to modify uefi memmap for kdump case?
> 
> Technically it would be possible, however I don't think it's necessary,
> and I think it would be disadvantageous to do so.
> 
> Describing the range(s) the crash kernel can use in separate properties
> under /chosen has a number of advantages.

Ok, I got the points. We have a is_kdump_kernel() by checking if there is
elfcorehdr_addr kernel cmdline. This is mainly for some drivers which
do not work well in kdump kernel some uncertain reasons. But ideally I
think kernel should handle things just like in 1st kernel and avoid to use
it. 

> 
> > > In the !EFI case, we use the memory nodes in the DTB. Only in this case
> > > could usable-memory properties in memory nodes make sense. I'd prefer a
> > > uniform property under /chosen for both cases.
> > 
> > We stil use same DTB, need to modify the DT and update the usable and unusable
> > nodes for kdump?
> 
> We'd have a (slightly) modified DTB that contained additional properties
> describing the range(s) reserved for use by the crash kernel.
> 
> Other than those properties under /chosen (e.g. the command line, initrd
> pointers if any), it would be the original DTB.
> 
> Thanks,
> Mark.

Thanks
Dave

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 18/19] arm64: kdump: update a kernel doc
  2016-01-19 14:05               ` Mark Rutland
@ 2016-01-20  2:54                 ` Dave Young
  -1 siblings, 0 replies; 174+ messages in thread
From: Dave Young @ 2016-01-20  2:54 UTC (permalink / raw)
  To: linux-arm-kernel

On 01/19/16 at 02:05pm, Mark Rutland wrote:
> On Tue, Jan 19, 2016 at 09:52:33PM +0800, Dave Young wrote:
> > On 01/19/16 at 12:17pm, Mark Rutland wrote:
> > > On Tue, Jan 19, 2016 at 09:43:32AM +0800, Dave Young wrote:
> > > > On 01/18/16 at 07:26pm, AKASHI Takahiro wrote:
> > > > > On 01/16/2016 05:16 AM, Mark Rutland wrote:
> > > > > >On Fri, Jan 15, 2016 at 07:18:38PM +0000, Geoff Levand wrote:
> > > > > >>From: AKASHI Takahiro <takahiro.akashi@linaro.org>
> > > > > >>
> > > > > >>This patch adds arch specific descriptions about kdump usage on arm64
> > > > > >>to kdump.txt.
> > > > > >>
> > > > > >>Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
> > > > > >>---
> > > > > >>  Documentation/kdump/kdump.txt | 23 ++++++++++++++++++++++-
> > > > > >>  1 file changed, 22 insertions(+), 1 deletion(-)
> > > > > >>
> > > > > >>diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
> > > > > >>index bc4bd5a..36cf978 100644
> > > > > >>--- a/Documentation/kdump/kdump.txt
> > > > > >>+++ b/Documentation/kdump/kdump.txt
> > > > > >>@@ -18,7 +18,7 @@ memory image to a dump file on the local disk, or across the network to
> > > > > >>  a remote system.
> > > > > >>
> > > > > >>  Kdump and kexec are currently supported on the x86, x86_64, ppc64, ia64,
> > > > > >>-s390x and arm architectures.
> > > > > >>+s390x, arm and arm64 architectures.
> > > > > >>
> > > > > >>  When the system kernel boots, it reserves a small section of memory for
> > > > > >>  the dump-capture kernel. This ensures that ongoing Direct Memory Access
> > > > > >>@@ -249,6 +249,20 @@ Dump-capture kernel config options (Arch Dependent, arm)
> > > > > >>
> > > > > >>      AUTO_ZRELADDR=y
> > > > > >>
> > > > > >>+Dump-capture kernel config options (Arch Dependent, arm64)
> > > > > >>+----------------------------------------------------------
> > > > > >>+
> > > > > >>+1) The maximum memory size on the dump-capture kernel must be limited by
> > > > > >>+   specifying:
> > > > > >>+
> > > > > >>+   mem=X[MG]
> > > > > >>+
> > > > > >>+   where X should be less than or equal to the size in "crashkernel="
> > > > > >>+   boot parameter. Kexec-tools will automatically add this.
> > > > > >
> > > > > >
> > > > > >This is extremely fragile, and will trivially fail when the kernel can
> > > > > >be loaded anywhere (see [1]).
> > > > > 
> > > > > As I said before, this restriction also exists on arm, but I understand
> > > > > that recent Ard's patches break it.
> > > > > 
> > > > > >We must explicitly describe the set of regions the crash kernel may use
> > > > > >(i.e. we need base and size). NAK in the absence of that.
> > > > > 
> > > > > There seem to exist several approaches:
> > > > > (a) use a device-tree property, "linux,usable-memory", in addition to "reg"
> > > > >     under "memory" node
> > > > > (b) use a kernel's early parameter, "memmap=nn[@#$]ss"
> > > > > 
> > > > > Power PC takes (a), while this does not work on efi-started kernel
> > > > > because dtb has no "memory" nodes under efi.
> > > > > X86 takes (b). If we take this, we will need to overwrite a weak
> > > > > early_init_dt_add_memory().
> > > > 
> > > > X86 takes another way in latest kexec-tools and kexec_file_load, that is
> > > > recreating E820 table and pass it to kexec/kdump kernel, if the entries
> > > > are over E820 limitation then turn to use setup_data list for remain
> > > > entries.
> > > 
> > > This would imply modifying the EFI memory map or the memory nodes, which
> > > I'm not keen on.
> > > 
> > > I would prefer that they are left _pristine_, and we describe the
> > > restriction on the kdump kernel with additional properties under
> > > /chosen.
> > > 
> > > That leaves us with more useful information about the environment of the
> > > first kernel, is simpler for userspace (it's resilient to updates to the
> > > UEFI memory map spec, for example), and is simple for the crash kernel.
> > 
> > In theory kexec as boot loader should prepare correct efi memmap and pass
> > to kernel, but as you said yes it will increase complexity. We need banlance
> > them.
> 
> I'd argue that the "correct efi memmap" is what we were given by the
> firmware initially -- none of that information is any less true.

In X86 boot loader will cook a E820 map for kernel use, there's no such needs
in arm so maybe it is acceptable to use same memmap to avoid modifying it only
for kdump.

I think I will not insist though I like more about doing something in
bootloader instead of in kernel.

> 
> For kdump all we need to ensure is that the kdump kernel only uses the
> memory that was specially reserved for it by the first kernel. The
> simplest way of doing that is to tell the kdump kernel which specific
> region(s) of memory were reserved for it, leaving the EFI memory map
> alone.

Yes, agreed that it is simpler.

> 
> Thanks,
> Mark.

Thanks
Dave

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 18/19] arm64: kdump: update a kernel doc
@ 2016-01-20  2:54                 ` Dave Young
  0 siblings, 0 replies; 174+ messages in thread
From: Dave Young @ 2016-01-20  2:54 UTC (permalink / raw)
  To: Mark Rutland
  Cc: ard.biesheuvel, Geoff Levand, Catalin Marinas, Will Deacon,
	AKASHI Takahiro, James Morse, linux-arm-kernel, marc.zyngier,
	kexec, christoffer.dall

On 01/19/16 at 02:05pm, Mark Rutland wrote:
> On Tue, Jan 19, 2016 at 09:52:33PM +0800, Dave Young wrote:
> > On 01/19/16 at 12:17pm, Mark Rutland wrote:
> > > On Tue, Jan 19, 2016 at 09:43:32AM +0800, Dave Young wrote:
> > > > On 01/18/16 at 07:26pm, AKASHI Takahiro wrote:
> > > > > On 01/16/2016 05:16 AM, Mark Rutland wrote:
> > > > > >On Fri, Jan 15, 2016 at 07:18:38PM +0000, Geoff Levand wrote:
> > > > > >>From: AKASHI Takahiro <takahiro.akashi@linaro.org>
> > > > > >>
> > > > > >>This patch adds arch specific descriptions about kdump usage on arm64
> > > > > >>to kdump.txt.
> > > > > >>
> > > > > >>Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
> > > > > >>---
> > > > > >>  Documentation/kdump/kdump.txt | 23 ++++++++++++++++++++++-
> > > > > >>  1 file changed, 22 insertions(+), 1 deletion(-)
> > > > > >>
> > > > > >>diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
> > > > > >>index bc4bd5a..36cf978 100644
> > > > > >>--- a/Documentation/kdump/kdump.txt
> > > > > >>+++ b/Documentation/kdump/kdump.txt
> > > > > >>@@ -18,7 +18,7 @@ memory image to a dump file on the local disk, or across the network to
> > > > > >>  a remote system.
> > > > > >>
> > > > > >>  Kdump and kexec are currently supported on the x86, x86_64, ppc64, ia64,
> > > > > >>-s390x and arm architectures.
> > > > > >>+s390x, arm and arm64 architectures.
> > > > > >>
> > > > > >>  When the system kernel boots, it reserves a small section of memory for
> > > > > >>  the dump-capture kernel. This ensures that ongoing Direct Memory Access
> > > > > >>@@ -249,6 +249,20 @@ Dump-capture kernel config options (Arch Dependent, arm)
> > > > > >>
> > > > > >>      AUTO_ZRELADDR=y
> > > > > >>
> > > > > >>+Dump-capture kernel config options (Arch Dependent, arm64)
> > > > > >>+----------------------------------------------------------
> > > > > >>+
> > > > > >>+1) The maximum memory size on the dump-capture kernel must be limited by
> > > > > >>+   specifying:
> > > > > >>+
> > > > > >>+   mem=X[MG]
> > > > > >>+
> > > > > >>+   where X should be less than or equal to the size in "crashkernel="
> > > > > >>+   boot parameter. Kexec-tools will automatically add this.
> > > > > >
> > > > > >
> > > > > >This is extremely fragile, and will trivially fail when the kernel can
> > > > > >be loaded anywhere (see [1]).
> > > > > 
> > > > > As I said before, this restriction also exists on arm, but I understand
> > > > > that recent Ard's patches break it.
> > > > > 
> > > > > >We must explicitly describe the set of regions the crash kernel may use
> > > > > >(i.e. we need base and size). NAK in the absence of that.
> > > > > 
> > > > > There seem to exist several approaches:
> > > > > (a) use a device-tree property, "linux,usable-memory", in addition to "reg"
> > > > >     under "memory" node
> > > > > (b) use a kernel's early parameter, "memmap=nn[@#$]ss"
> > > > > 
> > > > > Power PC takes (a), while this does not work on efi-started kernel
> > > > > because dtb has no "memory" nodes under efi.
> > > > > X86 takes (b). If we take this, we will need to overwrite a weak
> > > > > early_init_dt_add_memory().
> > > > 
> > > > X86 takes another way in latest kexec-tools and kexec_file_load, that is
> > > > recreating E820 table and pass it to kexec/kdump kernel, if the entries
> > > > are over E820 limitation then turn to use setup_data list for remain
> > > > entries.
> > > 
> > > This would imply modifying the EFI memory map or the memory nodes, which
> > > I'm not keen on.
> > > 
> > > I would prefer that they are left _pristine_, and we describe the
> > > restriction on the kdump kernel with additional properties under
> > > /chosen.
> > > 
> > > That leaves us with more useful information about the environment of the
> > > first kernel, is simpler for userspace (it's resilient to updates to the
> > > UEFI memory map spec, for example), and is simple for the crash kernel.
> > 
> > In theory kexec as boot loader should prepare correct efi memmap and pass
> > to kernel, but as you said yes it will increase complexity. We need banlance
> > them.
> 
> I'd argue that the "correct efi memmap" is what we were given by the
> firmware initially -- none of that information is any less true.

In X86 boot loader will cook a E820 map for kernel use, there's no such needs
in arm so maybe it is acceptable to use same memmap to avoid modifying it only
for kdump.

I think I will not insist though I like more about doing something in
bootloader instead of in kernel.

> 
> For kdump all we need to ensure is that the kdump kernel only uses the
> memory that was specially reserved for it by the first kernel. The
> simplest way of doing that is to tell the kdump kernel which specific
> region(s) of memory were reserved for it, leaving the EFI memory map
> alone.

Yes, agreed that it is simpler.

> 
> Thanks,
> Mark.

Thanks
Dave

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 00/19] arm64 kexec kernel patches v13
  2016-01-20  0:15     ` Geoff Levand
@ 2016-01-20  2:56       ` Dave Young
  -1 siblings, 0 replies; 174+ messages in thread
From: Dave Young @ 2016-01-20  2:56 UTC (permalink / raw)
  To: linux-arm-kernel

On 01/19/16 at 04:15pm, Geoff Levand wrote:
> On Tue, 2016-01-19 at 20:32 +0800, Dave Young wrote:
> > Geoff, another question about kexec-tools part is, can the kexec
> > -tools code
> > been written in kernel? We have the infrastructure for kexec_file_load.
> 
> I see no technical reason why the arm64 kernel cannot support
> kexec_file_load.

Cool, care to port it to kernel so that we have kexec_file_load only in arm64
we do not need to support both kexec_load and kexec_file_load?

> 
> -Geoff
> 

Thanks
Dave

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 00/19] arm64 kexec kernel patches v13
@ 2016-01-20  2:56       ` Dave Young
  0 siblings, 0 replies; 174+ messages in thread
From: Dave Young @ 2016-01-20  2:56 UTC (permalink / raw)
  To: Geoff Levand
  Cc: Mark Rutland, marc.zyngier, Catalin Marinas, Will Deacon,
	AKASHI Takahiro, James Morse, christoffer.dall, kexec,
	linux-arm-kernel

On 01/19/16 at 04:15pm, Geoff Levand wrote:
> On Tue, 2016-01-19 at 20:32 +0800, Dave Young wrote:
> > Geoff, another question about kexec-tools part is, can the kexec
> > -tools code
> > been written in kernel? We have the infrastructure for kexec_file_load.
> 
> I see no technical reason why the arm64 kernel cannot support
> kexec_file_load.

Cool, care to port it to kernel so that we have kexec_file_load only in arm64
we do not need to support both kexec_load and kexec_file_load?

> 
> -Geoff
> 

Thanks
Dave

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 18/19] arm64: kdump: update a kernel doc
  2016-01-19 12:10             ` Mark Rutland
@ 2016-01-20  4:34               ` AKASHI Takahiro
  -1 siblings, 0 replies; 174+ messages in thread
From: AKASHI Takahiro @ 2016-01-20  4:34 UTC (permalink / raw)
  To: linux-arm-kernel

On 01/19/2016 09:10 PM, Mark Rutland wrote:
> On Tue, Jan 19, 2016 at 02:31:05PM +0900, AKASHI Takahiro wrote:
>> On 01/18/2016 08:29 PM, Mark Rutland wrote:
>>> On Mon, Jan 18, 2016 at 07:26:04PM +0900, AKASHI Takahiro wrote:
>>>> On 01/16/2016 05:16 AM, Mark Rutland wrote:
>>>>> On Fri, Jan 15, 2016 at 07:18:38PM +0000, Geoff Levand wrote:
>>>>>> From: AKASHI Takahiro <takahiro.akashi@linaro.org>
>>>>>>
>>>>>> This patch adds arch specific descriptions about kdump usage on arm64
>>>>>> to kdump.txt.
>>>>>>
>>>>>> Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
>>>>>> ---
>>>>>>   Documentation/kdump/kdump.txt | 23 ++++++++++++++++++++++-
>>>>>>   1 file changed, 22 insertions(+), 1 deletion(-)
>>>>>>
>>>>>> diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
>>>>>> index bc4bd5a..36cf978 100644
>>>>>> --- a/Documentation/kdump/kdump.txt
>>>>>> +++ b/Documentation/kdump/kdump.txt
>>>>>> @@ -18,7 +18,7 @@ memory image to a dump file on the local disk, or across the network to
>>>>>>   a remote system.
>>>>>>
>>>>>>   Kdump and kexec are currently supported on the x86, x86_64, ppc64, ia64,
>>>>>> -s390x and arm architectures.
>>>>>> +s390x, arm and arm64 architectures.
>>>>>>
>>>>>>   When the system kernel boots, it reserves a small section of memory for
>>>>>>   the dump-capture kernel. This ensures that ongoing Direct Memory Access
>>>>>> @@ -249,6 +249,20 @@ Dump-capture kernel config options (Arch Dependent, arm)
>>>>>>
>>>>>>       AUTO_ZRELADDR=y
>>>>>>
>>>>>> +Dump-capture kernel config options (Arch Dependent, arm64)
>>>>>> +----------------------------------------------------------
>>>>>> +
>>>>>> +1) The maximum memory size on the dump-capture kernel must be limited by
>>>>>> +   specifying:
>>>>>> +
>>>>>> +   mem=X[MG]
>>>>>> +
>>>>>> +   where X should be less than or equal to the size in "crashkernel="
>>>>>> +   boot parameter. Kexec-tools will automatically add this.
>>>>>
>>>>>
>>>>> This is extremely fragile, and will trivially fail when the kernel can
>>>>> be loaded anywhere (see [1]).
>>>>
>>>> As I said before, this restriction also exists on arm, but I understand
>>>> that recent Ard's patches break it.
>>>>
>>>>> We must explicitly describe the set of regions the crash kernel may use
>>>>> (i.e. we need base and size). NAK in the absence of that.
>>>>
>>>> There seem to exist several approaches:
>>>> (a) use a device-tree property, "linux,usable-memory", in addition to "reg"
>>>
>>> I'm not opposed to the idea of a DT property, though I think that should
>>> live under /chosen.
>>
>> In fact, powerpc uses another property, "linux,crashkernel-base(& size)",
>> under /chosen in order for the *1st kernel* to export info about a memory
>> region for the 2nd(crash dump) kernel to user apps (kexec-tools).
>
> Do you mean that said property is provided _to_ the 1st kernel, or
> provided _by_ the first kernel?

_by_ the 1st kernel.

Based on a kernel parameter, "crashkernel=", the 1st kernel reserve some
memory region at boot time and export its information through this property.
Most architectures other than powerpc, however, use an iomem resource entry,
"Crash kernel", in /proc/iomem instead for this purpose.

>>> I see that "linux,usable-memory" exists already, though I'm confused as
>>> to exactly what it is for as there is no documentation (neither in the
>>> kernel nor in ePAPR).
>>
>> For example,
>>    memory at 0x80000000 {
>>      reg = <0x0 0x80000000 0x0 0x80000000>;
>>      linux,usable-memory = <0x0 0x8c000000 0x0 0x4000000>;
>>    }
>> There exists 2GB memory available on the system, but the last 64MB can be
>> used as a system ram. See early_init_dt_scan_memory() in fdt.c.
>
> Sure, except that's the implementation rather than the intended
> semantics (which are not defined).

Yeah, but the code itself was ack'ed (actually committed) by Grant:)

>>> It's also painful to alter multiple memory nodes
>>> to use that, and I can see that going wrong.
>>
>> Yeah, I implemented this feature in my old versions experimentally,
>> but didn't like it as we had to touch all the memory nodes.
>>
>>>>      under "memory" node
>>>> (b) use a kernel's early parameter, "memmap=nn[@#$]ss"
>>>
>>> I'm not too keen on this, as I think it's fragile, and logically
>>> somewhat distinct from what mem= is for (a best effort testing tool).
>>
>> I'm not sure whether it is fragile, and contrary to x86, as Dave
>> described, I think we will only need a single memmap= on arm64 as
>> efi's mem map table is accessible even on the crash kernel.
>
> I just realised I misread this as "mem=", apologies.
>
> It looks like memmap= to force a specific region of memory to be used
> may work.
>
> I'd still err on the side of preferring an explicit property in the DT.

Let's discuss in succeeding replies.

>>>> Power PC takes (a), while this does not work on efi-started kernel
>>>> because dtb has no "memory" nodes under efi.
>>>
>>> A property under /chosen would work for EFI too.
>>>
>>>> X86 takes (b). If we take this, we will need to overwrite a weak
>>>> early_init_dt_add_memory().
>>>> (I thought that this approach was not smart as we have three different
>>>> ways to specify memory regions, dtb, efi and this kernel parameter.)
>>>
>>> I'm not sure that's a big problem. We may be able to make this generic,
>>> also.
>>>
>>> We don't necessarily need a weak add memory function if we can guarantee
>>> nothing gets memblock_alloc'd before we carve it out.
>>>
>>> Something like the nomap stuff Ard put together might be useful here.
>>
>> I'm afraid it doesn't work.
>> It doesn't matter whether it is linearly mapped or not. We should prevent
>> any part of memory regions used by the 1st kernel from being reclaimed
>> by memblock_alloc() and others.
>
> Are you certain that nomap memory can be allocated? That sounds like a
> major bug.

I misunderstood. __next_mem_range() called by mem_alloc stuff has some check.

-Takahiro AKASHI

> Nomap memory should act like reserved memory with the additional
> property that the kernel must not map it implicitly.
>
>> Or do you mean we can introduce another memblock flag?
>
> That wasn't what I meant, but that would be a potential solution.
>
> Thanks,
> Mark.
>

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 18/19] arm64: kdump: update a kernel doc
@ 2016-01-20  4:34               ` AKASHI Takahiro
  0 siblings, 0 replies; 174+ messages in thread
From: AKASHI Takahiro @ 2016-01-20  4:34 UTC (permalink / raw)
  To: Mark Rutland
  Cc: ard.biesheuvel, Geoff Levand, Catalin Marinas, Will Deacon,
	marc.zyngier, James Morse, linux-arm-kernel, kexec,
	christoffer.dall

On 01/19/2016 09:10 PM, Mark Rutland wrote:
> On Tue, Jan 19, 2016 at 02:31:05PM +0900, AKASHI Takahiro wrote:
>> On 01/18/2016 08:29 PM, Mark Rutland wrote:
>>> On Mon, Jan 18, 2016 at 07:26:04PM +0900, AKASHI Takahiro wrote:
>>>> On 01/16/2016 05:16 AM, Mark Rutland wrote:
>>>>> On Fri, Jan 15, 2016 at 07:18:38PM +0000, Geoff Levand wrote:
>>>>>> From: AKASHI Takahiro <takahiro.akashi@linaro.org>
>>>>>>
>>>>>> This patch adds arch specific descriptions about kdump usage on arm64
>>>>>> to kdump.txt.
>>>>>>
>>>>>> Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
>>>>>> ---
>>>>>>   Documentation/kdump/kdump.txt | 23 ++++++++++++++++++++++-
>>>>>>   1 file changed, 22 insertions(+), 1 deletion(-)
>>>>>>
>>>>>> diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
>>>>>> index bc4bd5a..36cf978 100644
>>>>>> --- a/Documentation/kdump/kdump.txt
>>>>>> +++ b/Documentation/kdump/kdump.txt
>>>>>> @@ -18,7 +18,7 @@ memory image to a dump file on the local disk, or across the network to
>>>>>>   a remote system.
>>>>>>
>>>>>>   Kdump and kexec are currently supported on the x86, x86_64, ppc64, ia64,
>>>>>> -s390x and arm architectures.
>>>>>> +s390x, arm and arm64 architectures.
>>>>>>
>>>>>>   When the system kernel boots, it reserves a small section of memory for
>>>>>>   the dump-capture kernel. This ensures that ongoing Direct Memory Access
>>>>>> @@ -249,6 +249,20 @@ Dump-capture kernel config options (Arch Dependent, arm)
>>>>>>
>>>>>>       AUTO_ZRELADDR=y
>>>>>>
>>>>>> +Dump-capture kernel config options (Arch Dependent, arm64)
>>>>>> +----------------------------------------------------------
>>>>>> +
>>>>>> +1) The maximum memory size on the dump-capture kernel must be limited by
>>>>>> +   specifying:
>>>>>> +
>>>>>> +   mem=X[MG]
>>>>>> +
>>>>>> +   where X should be less than or equal to the size in "crashkernel="
>>>>>> +   boot parameter. Kexec-tools will automatically add this.
>>>>>
>>>>>
>>>>> This is extremely fragile, and will trivially fail when the kernel can
>>>>> be loaded anywhere (see [1]).
>>>>
>>>> As I said before, this restriction also exists on arm, but I understand
>>>> that recent Ard's patches break it.
>>>>
>>>>> We must explicitly describe the set of regions the crash kernel may use
>>>>> (i.e. we need base and size). NAK in the absence of that.
>>>>
>>>> There seem to exist several approaches:
>>>> (a) use a device-tree property, "linux,usable-memory", in addition to "reg"
>>>
>>> I'm not opposed to the idea of a DT property, though I think that should
>>> live under /chosen.
>>
>> In fact, powerpc uses another property, "linux,crashkernel-base(& size)",
>> under /chosen in order for the *1st kernel* to export info about a memory
>> region for the 2nd(crash dump) kernel to user apps (kexec-tools).
>
> Do you mean that said property is provided _to_ the 1st kernel, or
> provided _by_ the first kernel?

_by_ the 1st kernel.

Based on a kernel parameter, "crashkernel=", the 1st kernel reserve some
memory region at boot time and export its information through this property.
Most architectures other than powerpc, however, use an iomem resource entry,
"Crash kernel", in /proc/iomem instead for this purpose.

>>> I see that "linux,usable-memory" exists already, though I'm confused as
>>> to exactly what it is for as there is no documentation (neither in the
>>> kernel nor in ePAPR).
>>
>> For example,
>>    memory@0x80000000 {
>>      reg = <0x0 0x80000000 0x0 0x80000000>;
>>      linux,usable-memory = <0x0 0x8c000000 0x0 0x4000000>;
>>    }
>> There exists 2GB memory available on the system, but the last 64MB can be
>> used as a system ram. See early_init_dt_scan_memory() in fdt.c.
>
> Sure, except that's the implementation rather than the intended
> semantics (which are not defined).

Yeah, but the code itself was ack'ed (actually committed) by Grant:)

>>> It's also painful to alter multiple memory nodes
>>> to use that, and I can see that going wrong.
>>
>> Yeah, I implemented this feature in my old versions experimentally,
>> but didn't like it as we had to touch all the memory nodes.
>>
>>>>      under "memory" node
>>>> (b) use a kernel's early parameter, "memmap=nn[@#$]ss"
>>>
>>> I'm not too keen on this, as I think it's fragile, and logically
>>> somewhat distinct from what mem= is for (a best effort testing tool).
>>
>> I'm not sure whether it is fragile, and contrary to x86, as Dave
>> described, I think we will only need a single memmap= on arm64 as
>> efi's mem map table is accessible even on the crash kernel.
>
> I just realised I misread this as "mem=", apologies.
>
> It looks like memmap= to force a specific region of memory to be used
> may work.
>
> I'd still err on the side of preferring an explicit property in the DT.

Let's discuss in succeeding replies.

>>>> Power PC takes (a), while this does not work on efi-started kernel
>>>> because dtb has no "memory" nodes under efi.
>>>
>>> A property under /chosen would work for EFI too.
>>>
>>>> X86 takes (b). If we take this, we will need to overwrite a weak
>>>> early_init_dt_add_memory().
>>>> (I thought that this approach was not smart as we have three different
>>>> ways to specify memory regions, dtb, efi and this kernel parameter.)
>>>
>>> I'm not sure that's a big problem. We may be able to make this generic,
>>> also.
>>>
>>> We don't necessarily need a weak add memory function if we can guarantee
>>> nothing gets memblock_alloc'd before we carve it out.
>>>
>>> Something like the nomap stuff Ard put together might be useful here.
>>
>> I'm afraid it doesn't work.
>> It doesn't matter whether it is linearly mapped or not. We should prevent
>> any part of memory regions used by the 1st kernel from being reclaimed
>> by memblock_alloc() and others.
>
> Are you certain that nomap memory can be allocated? That sounds like a
> major bug.

I misunderstood. __next_mem_range() called by mem_alloc stuff has some check.

-Takahiro AKASHI

> Nomap memory should act like reserved memory with the additional
> property that the kernel must not map it implicitly.
>
>> Or do you mean we can introduce another memblock flag?
>
> That wasn't what I meant, but that would be a potential solution.
>
> Thanks,
> Mark.
>

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 18/19] arm64: kdump: update a kernel doc
  2016-01-19 14:01                   ` Mark Rutland
@ 2016-01-20  5:25                     ` AKASHI Takahiro
  -1 siblings, 0 replies; 174+ messages in thread
From: AKASHI Takahiro @ 2016-01-20  5:25 UTC (permalink / raw)
  To: linux-arm-kernel

On 01/19/2016 11:01 PM, Mark Rutland wrote:
> On Tue, Jan 19, 2016 at 09:45:53PM +0800, Dave Young wrote:
>> On 01/19/16 at 12:51pm, Mark Rutland wrote:
>>> On Tue, Jan 19, 2016 at 08:28:48PM +0800, Dave Young wrote:
>>>> On 01/19/16 at 02:35pm, AKASHI Takahiro wrote:
>>>>> On 01/19/2016 10:43 AM, Dave Young wrote:
>>>>>> X86 takes another way in latest kexec-tools and kexec_file_load, that is
>>>>>> recreating E820 table and pass it to kexec/kdump kernel, if the entries
>>>>>> are over E820 limitation then turn to use setup_data list for remain
>>>>>> entries.
>>>>>
>>>>> Thanks. I will visit x86 code again.
>>>>>
>>>>>> I think it is X86 specific. Personally I think device tree property is
>>>>>> better.
>>>>>
>>>>> Do you think so?
>>>>
>>>> I'm not sure it is the best way. For X86 we run into problem with
>>>> memmap= design, one example is pci domain X (X>1) need the pci memory
>>>> ranges being passed to kdump kernel. When we passed reserved ranges in /proc/iomem
>>>> to 2nd kernel we find that cmdline[] array is not big enough.
>>>
>>> I'm not sure how PCI ranges relate to the memory map used for normal
>>> memory (i.e. RAM), though I'm probably missing some caveat with the way
>>> ACPI and UEFI describe PCI. Why does memmap= affect PCI memory?
>>
>> Here is the old patch which was rejected in kexec-tools:
>> http://lists.infradead.org/pipermail/kexec/2013-February/007924.html
>>
>>>
>>> If the kernel got the rest of its system topology from DT, the PCI
>>> regions would be described there.
>>
>> Yes, if kdump kernel use same DT as 1st kernel.
>
> Other than for testing purposes, I don't see why you'd pass the kdump
> kernel a DTB inconsistent with that the 1st kernel was passsed (other
> than some proerties under /chosen).
>
> We added /sys/firmware/fdt specifically to allow the kexec tools to get
> the exact DTB the first kernel used. There's no reason for tools to have
> to make something up.

Currently, arm64 kexec-tools modifies only a cmdline property in dtb
to pass a "elfcorehdr=" parameter as well as other restrictions (like maxcpus=1).

>>>> Do you think for arm64 only usable memory is necessary to let kdump kernel
>>>> know? I'm curious about how arm64 kernel get all memory layout from boot loader,
>>>> via UEFI memmap?
>>>
>>> When booted via EFI, we use the EFI memory map. The EFI stub handles
>>> acquring the relevant information and passing that to the first kernel
>>> in the DTB (see Documentation/arm/uefi.txt).
>>
>> Ok, thanks for the pointer. So in dt we are just have uefi memmap infomation
>> instead of memory nodes details..
>
> When booted via EFI, yes.
>
> For NUMA topology in !ACPI kernels, we might need to also retain and
> parse memory nodes, but only for toplogy information. The kernel would
> still only use memory as described by the EFI memory map.
>
> There's a horrible edge case I've spotted if performing a chain of
> cross-endian kexecs: LE -> BE -> LE, as the BE kernel would have to
> respect the EFI memory map so as to avoid corrupting it for the
> subsequent LE kernel. Other than this I believe everything should just
> work.

BE kernel doesn't support UEFI yet and cannot access UEFI memmap table. So,
for LE -> BE, we don't use a dtb generated from /sys/firmware/fdt (or /proc/device-tree)
(as in the case of LE -> LE) and require users to provide a dtb file explicitly.

For BE -> LE, BE kernel doesn't know wther UEFI memmap table is available or not
and so use the same (explicitly-provided) dtb (as LE -> LE in !UEFI)

>>> A kexec'd kernel should simply inherit that. So long as the DTB and/or
>>> UEFI tables in memory are the same, it would be the same as a cold boot.
>>
>> For kexec all memory ranges are same, for kdump we need use original reserved
>> range with crashkernel= as usable memory and all other orignal usable ranges
>> are not usable anymore.
>
> Sure. This is what I believe we should expose with an additional
> property under /chosen, while keeping everything else pristine.
>
> The crash kernel can then limit itself to that region, while it would
> have the information of the full memory map (which it could log and/or
> use to drive other dumping).

FYI,
all the original usable memory regions used by the 1st kernel are also
described in an ELF core header specified by "elfcorehdr=" parameter to
the crash dump kernel.

-Takahiro AKASHI

>> Is it possible to modify uefi memmap for kdump case?
>
> Technically it would be possible, however I don't think it's necessary,
> and I think it would be disadvantageous to do so.
>
> Describing the range(s) the crash kernel can use in separate properties
> under /chosen has a number of advantages.
>
>>> In the !EFI case, we use the memory nodes in the DTB. Only in this case
>>> could usable-memory properties in memory nodes make sense. I'd prefer a
>>> uniform property under /chosen for both cases.
>>
>> We stil use same DTB, need to modify the DT and update the usable and unusable
>> nodes for kdump?
>
> We'd have a (slightly) modified DTB that contained additional properties
> describing the range(s) reserved for use by the crash kernel.
>
> Other than those properties under /chosen (e.g. the command line, initrd
> pointers if any), it would be the original DTB.
>
> Thanks,
> Mark.
>

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 18/19] arm64: kdump: update a kernel doc
@ 2016-01-20  5:25                     ` AKASHI Takahiro
  0 siblings, 0 replies; 174+ messages in thread
From: AKASHI Takahiro @ 2016-01-20  5:25 UTC (permalink / raw)
  To: Mark Rutland, Dave Young
  Cc: ard.biesheuvel, Geoff Levand, Catalin Marinas, Will Deacon,
	marc.zyngier, James Morse, christoffer.dall, kexec,
	linux-arm-kernel

On 01/19/2016 11:01 PM, Mark Rutland wrote:
> On Tue, Jan 19, 2016 at 09:45:53PM +0800, Dave Young wrote:
>> On 01/19/16 at 12:51pm, Mark Rutland wrote:
>>> On Tue, Jan 19, 2016 at 08:28:48PM +0800, Dave Young wrote:
>>>> On 01/19/16 at 02:35pm, AKASHI Takahiro wrote:
>>>>> On 01/19/2016 10:43 AM, Dave Young wrote:
>>>>>> X86 takes another way in latest kexec-tools and kexec_file_load, that is
>>>>>> recreating E820 table and pass it to kexec/kdump kernel, if the entries
>>>>>> are over E820 limitation then turn to use setup_data list for remain
>>>>>> entries.
>>>>>
>>>>> Thanks. I will visit x86 code again.
>>>>>
>>>>>> I think it is X86 specific. Personally I think device tree property is
>>>>>> better.
>>>>>
>>>>> Do you think so?
>>>>
>>>> I'm not sure it is the best way. For X86 we run into problem with
>>>> memmap= design, one example is pci domain X (X>1) need the pci memory
>>>> ranges being passed to kdump kernel. When we passed reserved ranges in /proc/iomem
>>>> to 2nd kernel we find that cmdline[] array is not big enough.
>>>
>>> I'm not sure how PCI ranges relate to the memory map used for normal
>>> memory (i.e. RAM), though I'm probably missing some caveat with the way
>>> ACPI and UEFI describe PCI. Why does memmap= affect PCI memory?
>>
>> Here is the old patch which was rejected in kexec-tools:
>> http://lists.infradead.org/pipermail/kexec/2013-February/007924.html
>>
>>>
>>> If the kernel got the rest of its system topology from DT, the PCI
>>> regions would be described there.
>>
>> Yes, if kdump kernel use same DT as 1st kernel.
>
> Other than for testing purposes, I don't see why you'd pass the kdump
> kernel a DTB inconsistent with that the 1st kernel was passsed (other
> than some proerties under /chosen).
>
> We added /sys/firmware/fdt specifically to allow the kexec tools to get
> the exact DTB the first kernel used. There's no reason for tools to have
> to make something up.

Currently, arm64 kexec-tools modifies only a cmdline property in dtb
to pass a "elfcorehdr=" parameter as well as other restrictions (like maxcpus=1).

>>>> Do you think for arm64 only usable memory is necessary to let kdump kernel
>>>> know? I'm curious about how arm64 kernel get all memory layout from boot loader,
>>>> via UEFI memmap?
>>>
>>> When booted via EFI, we use the EFI memory map. The EFI stub handles
>>> acquring the relevant information and passing that to the first kernel
>>> in the DTB (see Documentation/arm/uefi.txt).
>>
>> Ok, thanks for the pointer. So in dt we are just have uefi memmap infomation
>> instead of memory nodes details..
>
> When booted via EFI, yes.
>
> For NUMA topology in !ACPI kernels, we might need to also retain and
> parse memory nodes, but only for toplogy information. The kernel would
> still only use memory as described by the EFI memory map.
>
> There's a horrible edge case I've spotted if performing a chain of
> cross-endian kexecs: LE -> BE -> LE, as the BE kernel would have to
> respect the EFI memory map so as to avoid corrupting it for the
> subsequent LE kernel. Other than this I believe everything should just
> work.

BE kernel doesn't support UEFI yet and cannot access UEFI memmap table. So,
for LE -> BE, we don't use a dtb generated from /sys/firmware/fdt (or /proc/device-tree)
(as in the case of LE -> LE) and require users to provide a dtb file explicitly.

For BE -> LE, BE kernel doesn't know wther UEFI memmap table is available or not
and so use the same (explicitly-provided) dtb (as LE -> LE in !UEFI)

>>> A kexec'd kernel should simply inherit that. So long as the DTB and/or
>>> UEFI tables in memory are the same, it would be the same as a cold boot.
>>
>> For kexec all memory ranges are same, for kdump we need use original reserved
>> range with crashkernel= as usable memory and all other orignal usable ranges
>> are not usable anymore.
>
> Sure. This is what I believe we should expose with an additional
> property under /chosen, while keeping everything else pristine.
>
> The crash kernel can then limit itself to that region, while it would
> have the information of the full memory map (which it could log and/or
> use to drive other dumping).

FYI,
all the original usable memory regions used by the 1st kernel are also
described in an ELF core header specified by "elfcorehdr=" parameter to
the crash dump kernel.

-Takahiro AKASHI

>> Is it possible to modify uefi memmap for kdump case?
>
> Technically it would be possible, however I don't think it's necessary,
> and I think it would be disadvantageous to do so.
>
> Describing the range(s) the crash kernel can use in separate properties
> under /chosen has a number of advantages.
>
>>> In the !EFI case, we use the memory nodes in the DTB. Only in this case
>>> could usable-memory properties in memory nodes make sense. I'd prefer a
>>> uniform property under /chosen for both cases.
>>
>> We stil use same DTB, need to modify the DT and update the usable and unusable
>> nodes for kdump?
>
> We'd have a (slightly) modified DTB that contained additional properties
> describing the range(s) reserved for use by the crash kernel.
>
> Other than those properties under /chosen (e.g. the command line, initrd
> pointers if any), it would be the original DTB.
>
> Thanks,
> Mark.
>

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 18/19] arm64: kdump: update a kernel doc
  2016-01-20  2:49                     ` Dave Young
@ 2016-01-20  6:07                       ` AKASHI Takahiro
  -1 siblings, 0 replies; 174+ messages in thread
From: AKASHI Takahiro @ 2016-01-20  6:07 UTC (permalink / raw)
  To: linux-arm-kernel

On 01/20/2016 11:49 AM, Dave Young wrote:
> On 01/19/16 at 02:01pm, Mark Rutland wrote:
>> On Tue, Jan 19, 2016 at 09:45:53PM +0800, Dave Young wrote:
>>> On 01/19/16 at 12:51pm, Mark Rutland wrote:
>>>> On Tue, Jan 19, 2016 at 08:28:48PM +0800, Dave Young wrote:
>>>>> On 01/19/16 at 02:35pm, AKASHI Takahiro wrote:
>>>>>> On 01/19/2016 10:43 AM, Dave Young wrote:
>>>>>>> X86 takes another way in latest kexec-tools and kexec_file_load, that is
>>>>>>> recreating E820 table and pass it to kexec/kdump kernel, if the entries
>>>>>>> are over E820 limitation then turn to use setup_data list for remain
>>>>>>> entries.
>>>>>>
>>>>>> Thanks. I will visit x86 code again.
>>>>>>
>>>>>>> I think it is X86 specific. Personally I think device tree property is
>>>>>>> better.
>>>>>>
>>>>>> Do you think so?
>>>>>
>>>>> I'm not sure it is the best way. For X86 we run into problem with
>>>>> memmap= design, one example is pci domain X (X>1) need the pci memory
>>>>> ranges being passed to kdump kernel. When we passed reserved ranges in /proc/iomem
>>>>> to 2nd kernel we find that cmdline[] array is not big enough.
>>>>
>>>> I'm not sure how PCI ranges relate to the memory map used for normal
>>>> memory (i.e. RAM), though I'm probably missing some caveat with the way
>>>> ACPI and UEFI describe PCI. Why does memmap= affect PCI memory?
>>>
>>> Here is the old patch which was rejected in kexec-tools:
>>> http://lists.infradead.org/pipermail/kexec/2013-February/007924.html
>>>
>>>>
>>>> If the kernel got the rest of its system topology from DT, the PCI
>>>> regions would be described there.
>>>
>>> Yes, if kdump kernel use same DT as 1st kernel.
>>
>> Other than for testing purposes, I don't see why you'd pass the kdump
>> kernel a DTB inconsistent with that the 1st kernel was passsed (other
>> than some proerties under /chosen).
>>
>> We added /sys/firmware/fdt specifically to allow the kexec tools to get
>> the exact DTB the first kernel used. There's no reason for tools to have
>> to make something up.
>
> Agreed, but kexec-tools has an option to pass in any dtb files. Who knows
> how one will use it unless dropping the option and use /sys/firmware/fdt
> unconditionally.

As a matter of fact, specifying proper command line parameters as well as
dtb is partly users' responsibility for kdump to work correctly.
(especially for BE kernel)

> If we choose to implement kexec_file_load only in kernel, the interfaces
> provided are kernel, initrd and cmdline. We can always use same dtb.

I would say that we can always use the same dtb even with kexec_load
from user's perspective. Right?
(The difference is whether changes are made by kernel itself or kexec-tools.)

>>
>>>>> Do you think for arm64 only usable memory is necessary to let kdump kernel
>>>>> know? I'm curious about how arm64 kernel get all memory layout from boot loader,
>>>>> via UEFI memmap?
>>>>
>>>> When booted via EFI, we use the EFI memory map. The EFI stub handles
>>>> acquring the relevant information and passing that to the first kernel
>>>> in the DTB (see Documentation/arm/uefi.txt).
>>>
>>> Ok, thanks for the pointer. So in dt we are just have uefi memmap infomation
>>> instead of memory nodes details..
>>
>> When booted via EFI, yes.
>>
>> For NUMA topology in !ACPI kernels, we might need to also retain and
>> parse memory nodes, but only for toplogy information. The kernel would
>> still only use memory as described by the EFI memory map.
>>
>> There's a horrible edge case I've spotted if performing a chain of
>> cross-endian kexecs: LE -> BE -> LE, as the BE kernel would have to
>> respect the EFI memory map so as to avoid corrupting it for the
>> subsequent LE kernel. Other than this I believe everything should just
>> work.
>
> Firmware do not know kernel endianniess, kernel should respect firmware
> maps and adapt to it, it sounds like a generic issue not specfic to kexec.

On arm64, a kernel image header has a bit field to specify the image's endianness.
Anyway, our current implementation replies on a user-supplied dtb to start BE kernel.

>>
>>>> A kexec'd kernel should simply inherit that. So long as the DTB and/or
>>>> UEFI tables in memory are the same, it would be the same as a cold boot.
>>>
>>> For kexec all memory ranges are same, for kdump we need use original reserved
>>> range with crashkernel= as usable memory and all other orignal usable ranges
>>> are not usable anymore.
>>
>> Sure. This is what I believe we should expose with an additional
>> property under /chosen, while keeping everything else pristine.
>>
>> The crash kernel can then limit itself to that region, while it would
>> have the information of the full memory map (which it could log and/or
>> use to drive other dumping).
>
> In this way kernel should be aware it is a kdump booting, it is doable though
> I feel it is better for kdump kernel in a black box with infomations it
> can use just like the 1st kernel. Things here is where we choose to cook
> the memory infomation in boot loader or in kernel itself.
>
>>
>>> Is it possible to modify uefi memmap for kdump case?
>>
>> Technically it would be possible, however I don't think it's necessary,
>> and I think it would be disadvantageous to do so.
>>
>> Describing the range(s) the crash kernel can use in separate properties
>> under /chosen has a number of advantages.
>
> Ok, I got the points. We have a is_kdump_kernel() by checking if there is
> elfcorehdr_addr kernel cmdline. This is mainly for some drivers which
> do not work well in kdump kernel some uncertain reasons. But ideally I
> think kernel should handle things just like in 1st kernel and avoid to use
> it.

So I'm not still sure about what are advantages of a property under /chosen
over "memmap=" kernel parameter.
Both are simple and can have the same effect with minimizing changes to dtb.
(But if, in the latter case, we have to provide *all* the memory-related information
through "memmap=" parameters, it would be much complicated.)

-Takahiro AKASHI

>>
>>>> In the !EFI case, we use the memory nodes in the DTB. Only in this case
>>>> could usable-memory properties in memory nodes make sense. I'd prefer a
>>>> uniform property under /chosen for both cases.
>>>
>>> We stil use same DTB, need to modify the DT and update the usable and unusable
>>> nodes for kdump?
>>
>> We'd have a (slightly) modified DTB that contained additional properties
>> describing the range(s) reserved for use by the crash kernel.
>>
>> Other than those properties under /chosen (e.g. the command line, initrd
>> pointers if any), it would be the original DTB.
>>
>> Thanks,
>> Mark.
>
> Thanks
> Dave
>

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 18/19] arm64: kdump: update a kernel doc
@ 2016-01-20  6:07                       ` AKASHI Takahiro
  0 siblings, 0 replies; 174+ messages in thread
From: AKASHI Takahiro @ 2016-01-20  6:07 UTC (permalink / raw)
  To: Dave Young, Mark Rutland
  Cc: ard.biesheuvel, Geoff Levand, Catalin Marinas, Will Deacon,
	marc.zyngier, James Morse, christoffer.dall, kexec,
	linux-arm-kernel

On 01/20/2016 11:49 AM, Dave Young wrote:
> On 01/19/16 at 02:01pm, Mark Rutland wrote:
>> On Tue, Jan 19, 2016 at 09:45:53PM +0800, Dave Young wrote:
>>> On 01/19/16 at 12:51pm, Mark Rutland wrote:
>>>> On Tue, Jan 19, 2016 at 08:28:48PM +0800, Dave Young wrote:
>>>>> On 01/19/16 at 02:35pm, AKASHI Takahiro wrote:
>>>>>> On 01/19/2016 10:43 AM, Dave Young wrote:
>>>>>>> X86 takes another way in latest kexec-tools and kexec_file_load, that is
>>>>>>> recreating E820 table and pass it to kexec/kdump kernel, if the entries
>>>>>>> are over E820 limitation then turn to use setup_data list for remain
>>>>>>> entries.
>>>>>>
>>>>>> Thanks. I will visit x86 code again.
>>>>>>
>>>>>>> I think it is X86 specific. Personally I think device tree property is
>>>>>>> better.
>>>>>>
>>>>>> Do you think so?
>>>>>
>>>>> I'm not sure it is the best way. For X86 we run into problem with
>>>>> memmap= design, one example is pci domain X (X>1) need the pci memory
>>>>> ranges being passed to kdump kernel. When we passed reserved ranges in /proc/iomem
>>>>> to 2nd kernel we find that cmdline[] array is not big enough.
>>>>
>>>> I'm not sure how PCI ranges relate to the memory map used for normal
>>>> memory (i.e. RAM), though I'm probably missing some caveat with the way
>>>> ACPI and UEFI describe PCI. Why does memmap= affect PCI memory?
>>>
>>> Here is the old patch which was rejected in kexec-tools:
>>> http://lists.infradead.org/pipermail/kexec/2013-February/007924.html
>>>
>>>>
>>>> If the kernel got the rest of its system topology from DT, the PCI
>>>> regions would be described there.
>>>
>>> Yes, if kdump kernel use same DT as 1st kernel.
>>
>> Other than for testing purposes, I don't see why you'd pass the kdump
>> kernel a DTB inconsistent with that the 1st kernel was passsed (other
>> than some proerties under /chosen).
>>
>> We added /sys/firmware/fdt specifically to allow the kexec tools to get
>> the exact DTB the first kernel used. There's no reason for tools to have
>> to make something up.
>
> Agreed, but kexec-tools has an option to pass in any dtb files. Who knows
> how one will use it unless dropping the option and use /sys/firmware/fdt
> unconditionally.

As a matter of fact, specifying proper command line parameters as well as
dtb is partly users' responsibility for kdump to work correctly.
(especially for BE kernel)

> If we choose to implement kexec_file_load only in kernel, the interfaces
> provided are kernel, initrd and cmdline. We can always use same dtb.

I would say that we can always use the same dtb even with kexec_load
from user's perspective. Right?
(The difference is whether changes are made by kernel itself or kexec-tools.)

>>
>>>>> Do you think for arm64 only usable memory is necessary to let kdump kernel
>>>>> know? I'm curious about how arm64 kernel get all memory layout from boot loader,
>>>>> via UEFI memmap?
>>>>
>>>> When booted via EFI, we use the EFI memory map. The EFI stub handles
>>>> acquring the relevant information and passing that to the first kernel
>>>> in the DTB (see Documentation/arm/uefi.txt).
>>>
>>> Ok, thanks for the pointer. So in dt we are just have uefi memmap infomation
>>> instead of memory nodes details..
>>
>> When booted via EFI, yes.
>>
>> For NUMA topology in !ACPI kernels, we might need to also retain and
>> parse memory nodes, but only for toplogy information. The kernel would
>> still only use memory as described by the EFI memory map.
>>
>> There's a horrible edge case I've spotted if performing a chain of
>> cross-endian kexecs: LE -> BE -> LE, as the BE kernel would have to
>> respect the EFI memory map so as to avoid corrupting it for the
>> subsequent LE kernel. Other than this I believe everything should just
>> work.
>
> Firmware do not know kernel endianniess, kernel should respect firmware
> maps and adapt to it, it sounds like a generic issue not specfic to kexec.

On arm64, a kernel image header has a bit field to specify the image's endianness.
Anyway, our current implementation replies on a user-supplied dtb to start BE kernel.

>>
>>>> A kexec'd kernel should simply inherit that. So long as the DTB and/or
>>>> UEFI tables in memory are the same, it would be the same as a cold boot.
>>>
>>> For kexec all memory ranges are same, for kdump we need use original reserved
>>> range with crashkernel= as usable memory and all other orignal usable ranges
>>> are not usable anymore.
>>
>> Sure. This is what I believe we should expose with an additional
>> property under /chosen, while keeping everything else pristine.
>>
>> The crash kernel can then limit itself to that region, while it would
>> have the information of the full memory map (which it could log and/or
>> use to drive other dumping).
>
> In this way kernel should be aware it is a kdump booting, it is doable though
> I feel it is better for kdump kernel in a black box with infomations it
> can use just like the 1st kernel. Things here is where we choose to cook
> the memory infomation in boot loader or in kernel itself.
>
>>
>>> Is it possible to modify uefi memmap for kdump case?
>>
>> Technically it would be possible, however I don't think it's necessary,
>> and I think it would be disadvantageous to do so.
>>
>> Describing the range(s) the crash kernel can use in separate properties
>> under /chosen has a number of advantages.
>
> Ok, I got the points. We have a is_kdump_kernel() by checking if there is
> elfcorehdr_addr kernel cmdline. This is mainly for some drivers which
> do not work well in kdump kernel some uncertain reasons. But ideally I
> think kernel should handle things just like in 1st kernel and avoid to use
> it.

So I'm not still sure about what are advantages of a property under /chosen
over "memmap=" kernel parameter.
Both are simple and can have the same effect with minimizing changes to dtb.
(But if, in the latter case, we have to provide *all* the memory-related information
through "memmap=" parameters, it would be much complicated.)

-Takahiro AKASHI

>>
>>>> In the !EFI case, we use the memory nodes in the DTB. Only in this case
>>>> could usable-memory properties in memory nodes make sense. I'd prefer a
>>>> uniform property under /chosen for both cases.
>>>
>>> We stil use same DTB, need to modify the DT and update the usable and unusable
>>> nodes for kdump?
>>
>> We'd have a (slightly) modified DTB that contained additional properties
>> describing the range(s) reserved for use by the crash kernel.
>>
>> Other than those properties under /chosen (e.g. the command line, initrd
>> pointers if any), it would be the original DTB.
>>
>> Thanks,
>> Mark.
>
> Thanks
> Dave
>

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 18/19] arm64: kdump: update a kernel doc
  2016-01-20  6:07                       ` AKASHI Takahiro
@ 2016-01-20  6:38                         ` Dave Young
  -1 siblings, 0 replies; 174+ messages in thread
From: Dave Young @ 2016-01-20  6:38 UTC (permalink / raw)
  To: linux-arm-kernel

On 01/20/16 at 03:07pm, AKASHI Takahiro wrote:
> On 01/20/2016 11:49 AM, Dave Young wrote:
> >On 01/19/16 at 02:01pm, Mark Rutland wrote:
> >>On Tue, Jan 19, 2016 at 09:45:53PM +0800, Dave Young wrote:
> >>>On 01/19/16 at 12:51pm, Mark Rutland wrote:
> >>>>On Tue, Jan 19, 2016 at 08:28:48PM +0800, Dave Young wrote:
> >>>>>On 01/19/16 at 02:35pm, AKASHI Takahiro wrote:
> >>>>>>On 01/19/2016 10:43 AM, Dave Young wrote:
> >>>>>>>X86 takes another way in latest kexec-tools and kexec_file_load, that is
> >>>>>>>recreating E820 table and pass it to kexec/kdump kernel, if the entries
> >>>>>>>are over E820 limitation then turn to use setup_data list for remain
> >>>>>>>entries.
> >>>>>>
> >>>>>>Thanks. I will visit x86 code again.
> >>>>>>
> >>>>>>>I think it is X86 specific. Personally I think device tree property is
> >>>>>>>better.
> >>>>>>
> >>>>>>Do you think so?
> >>>>>
> >>>>>I'm not sure it is the best way. For X86 we run into problem with
> >>>>>memmap= design, one example is pci domain X (X>1) need the pci memory
> >>>>>ranges being passed to kdump kernel. When we passed reserved ranges in /proc/iomem
> >>>>>to 2nd kernel we find that cmdline[] array is not big enough.
> >>>>
> >>>>I'm not sure how PCI ranges relate to the memory map used for normal
> >>>>memory (i.e. RAM), though I'm probably missing some caveat with the way
> >>>>ACPI and UEFI describe PCI. Why does memmap= affect PCI memory?
> >>>
> >>>Here is the old patch which was rejected in kexec-tools:
> >>>http://lists.infradead.org/pipermail/kexec/2013-February/007924.html
> >>>
> >>>>
> >>>>If the kernel got the rest of its system topology from DT, the PCI
> >>>>regions would be described there.
> >>>
> >>>Yes, if kdump kernel use same DT as 1st kernel.
> >>
> >>Other than for testing purposes, I don't see why you'd pass the kdump
> >>kernel a DTB inconsistent with that the 1st kernel was passsed (other
> >>than some proerties under /chosen).
> >>
> >>We added /sys/firmware/fdt specifically to allow the kexec tools to get
> >>the exact DTB the first kernel used. There's no reason for tools to have
> >>to make something up.
> >
> >Agreed, but kexec-tools has an option to pass in any dtb files. Who knows
> >how one will use it unless dropping the option and use /sys/firmware/fdt
> >unconditionally.
> 
> As a matter of fact, specifying proper command line parameters as well as
> dtb is partly users' responsibility for kdump to work correctly.
> (especially for BE kernel)

Right.

> 
> >If we choose to implement kexec_file_load only in kernel, the interfaces
> >provided are kernel, initrd and cmdline. We can always use same dtb.
> 
> I would say that we can always use the same dtb even with kexec_load
> from user's perspective. Right?
> (The difference is whether changes are made by kernel itself or kexec-tools.)

Right.

> 
> >>
> >>>>>Do you think for arm64 only usable memory is necessary to let kdump kernel
> >>>>>know? I'm curious about how arm64 kernel get all memory layout from boot loader,
> >>>>>via UEFI memmap?
> >>>>
> >>>>When booted via EFI, we use the EFI memory map. The EFI stub handles
> >>>>acquring the relevant information and passing that to the first kernel
> >>>>in the DTB (see Documentation/arm/uefi.txt).
> >>>
> >>>Ok, thanks for the pointer. So in dt we are just have uefi memmap infomation
> >>>instead of memory nodes details..
> >>
> >>When booted via EFI, yes.
> >>
> >>For NUMA topology in !ACPI kernels, we might need to also retain and
> >>parse memory nodes, but only for toplogy information. The kernel would
> >>still only use memory as described by the EFI memory map.
> >>
> >>There's a horrible edge case I've spotted if performing a chain of
> >>cross-endian kexecs: LE -> BE -> LE, as the BE kernel would have to
> >>respect the EFI memory map so as to avoid corrupting it for the
> >>subsequent LE kernel. Other than this I believe everything should just
> >>work.
> >
> >Firmware do not know kernel endianniess, kernel should respect firmware
> >maps and adapt to it, it sounds like a generic issue not specfic to kexec.
> 
> On arm64, a kernel image header has a bit field to specify the image's endianness.
> Anyway, our current implementation replies on a user-supplied dtb to start BE kernel.

Ok, I means uefi memmap are same, not specific to LE or BE.

> 
> >>
> >>>>A kexec'd kernel should simply inherit that. So long as the DTB and/or
> >>>>UEFI tables in memory are the same, it would be the same as a cold boot.
> >>>
> >>>For kexec all memory ranges are same, for kdump we need use original reserved
> >>>range with crashkernel= as usable memory and all other orignal usable ranges
> >>>are not usable anymore.
> >>
> >>Sure. This is what I believe we should expose with an additional
> >>property under /chosen, while keeping everything else pristine.
> >>
> >>The crash kernel can then limit itself to that region, while it would
> >>have the information of the full memory map (which it could log and/or
> >>use to drive other dumping).
> >
> >In this way kernel should be aware it is a kdump booting, it is doable though
> >I feel it is better for kdump kernel in a black box with infomations it
> >can use just like the 1st kernel. Things here is where we choose to cook
> >the memory infomation in boot loader or in kernel itself.
> >
> >>
> >>>Is it possible to modify uefi memmap for kdump case?
> >>
> >>Technically it would be possible, however I don't think it's necessary,
> >>and I think it would be disadvantageous to do so.
> >>
> >>Describing the range(s) the crash kernel can use in separate properties
> >>under /chosen has a number of advantages.
> >
> >Ok, I got the points. We have a is_kdump_kernel() by checking if there is
> >elfcorehdr_addr kernel cmdline. This is mainly for some drivers which
> >do not work well in kdump kernel some uncertain reasons. But ideally I
> >think kernel should handle things just like in 1st kernel and avoid to use
> >it.
> 
> So I'm not still sure about what are advantages of a property under /chosen
> over "memmap=" kernel parameter.
> Both are simple and can have the same effect with minimizing changes to dtb.
> (But if, in the latter case, we have to provide *all* the memory-related information
> through "memmap=" parameters, it would be much complicated.)

Maybe I did not say it clearly, I prefer kexec syscall/tool to modifiy dtb
or uefi-memmap so that we do not need any extra kernel cmdline.

For x86 we would like to drop the memmap= usage in kexec-tools but we can not
do that for a compatibility problem about calgary iommu. So that currently
kexec-tools supports both recreating E820 maps and passing memmap=.

We should think it carefully because it will be hard to remove once we support it.
IMO handling it in code is better than using an external interface.

> 
> -Takahiro AKASHI
> 
> >>
> >>>>In the !EFI case, we use the memory nodes in the DTB. Only in this case
> >>>>could usable-memory properties in memory nodes make sense. I'd prefer a
> >>>>uniform property under /chosen for both cases.
> >>>
> >>>We stil use same DTB, need to modify the DT and update the usable and unusable
> >>>nodes for kdump?
> >>
> >>We'd have a (slightly) modified DTB that contained additional properties
> >>describing the range(s) reserved for use by the crash kernel.
> >>
> >>Other than those properties under /chosen (e.g. the command line, initrd
> >>pointers if any), it would be the original DTB.
> >>
> >>Thanks,
> >>Mark.
> >
> >Thanks
> >Dave
> >

Thanks
Dave

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 18/19] arm64: kdump: update a kernel doc
@ 2016-01-20  6:38                         ` Dave Young
  0 siblings, 0 replies; 174+ messages in thread
From: Dave Young @ 2016-01-20  6:38 UTC (permalink / raw)
  To: AKASHI Takahiro
  Cc: Mark Rutland, ard.biesheuvel, Geoff Levand, Catalin Marinas,
	Will Deacon, marc.zyngier, James Morse, christoffer.dall, kexec,
	linux-arm-kernel

On 01/20/16 at 03:07pm, AKASHI Takahiro wrote:
> On 01/20/2016 11:49 AM, Dave Young wrote:
> >On 01/19/16 at 02:01pm, Mark Rutland wrote:
> >>On Tue, Jan 19, 2016 at 09:45:53PM +0800, Dave Young wrote:
> >>>On 01/19/16 at 12:51pm, Mark Rutland wrote:
> >>>>On Tue, Jan 19, 2016 at 08:28:48PM +0800, Dave Young wrote:
> >>>>>On 01/19/16 at 02:35pm, AKASHI Takahiro wrote:
> >>>>>>On 01/19/2016 10:43 AM, Dave Young wrote:
> >>>>>>>X86 takes another way in latest kexec-tools and kexec_file_load, that is
> >>>>>>>recreating E820 table and pass it to kexec/kdump kernel, if the entries
> >>>>>>>are over E820 limitation then turn to use setup_data list for remain
> >>>>>>>entries.
> >>>>>>
> >>>>>>Thanks. I will visit x86 code again.
> >>>>>>
> >>>>>>>I think it is X86 specific. Personally I think device tree property is
> >>>>>>>better.
> >>>>>>
> >>>>>>Do you think so?
> >>>>>
> >>>>>I'm not sure it is the best way. For X86 we run into problem with
> >>>>>memmap= design, one example is pci domain X (X>1) need the pci memory
> >>>>>ranges being passed to kdump kernel. When we passed reserved ranges in /proc/iomem
> >>>>>to 2nd kernel we find that cmdline[] array is not big enough.
> >>>>
> >>>>I'm not sure how PCI ranges relate to the memory map used for normal
> >>>>memory (i.e. RAM), though I'm probably missing some caveat with the way
> >>>>ACPI and UEFI describe PCI. Why does memmap= affect PCI memory?
> >>>
> >>>Here is the old patch which was rejected in kexec-tools:
> >>>http://lists.infradead.org/pipermail/kexec/2013-February/007924.html
> >>>
> >>>>
> >>>>If the kernel got the rest of its system topology from DT, the PCI
> >>>>regions would be described there.
> >>>
> >>>Yes, if kdump kernel use same DT as 1st kernel.
> >>
> >>Other than for testing purposes, I don't see why you'd pass the kdump
> >>kernel a DTB inconsistent with that the 1st kernel was passsed (other
> >>than some proerties under /chosen).
> >>
> >>We added /sys/firmware/fdt specifically to allow the kexec tools to get
> >>the exact DTB the first kernel used. There's no reason for tools to have
> >>to make something up.
> >
> >Agreed, but kexec-tools has an option to pass in any dtb files. Who knows
> >how one will use it unless dropping the option and use /sys/firmware/fdt
> >unconditionally.
> 
> As a matter of fact, specifying proper command line parameters as well as
> dtb is partly users' responsibility for kdump to work correctly.
> (especially for BE kernel)

Right.

> 
> >If we choose to implement kexec_file_load only in kernel, the interfaces
> >provided are kernel, initrd and cmdline. We can always use same dtb.
> 
> I would say that we can always use the same dtb even with kexec_load
> from user's perspective. Right?
> (The difference is whether changes are made by kernel itself or kexec-tools.)

Right.

> 
> >>
> >>>>>Do you think for arm64 only usable memory is necessary to let kdump kernel
> >>>>>know? I'm curious about how arm64 kernel get all memory layout from boot loader,
> >>>>>via UEFI memmap?
> >>>>
> >>>>When booted via EFI, we use the EFI memory map. The EFI stub handles
> >>>>acquring the relevant information and passing that to the first kernel
> >>>>in the DTB (see Documentation/arm/uefi.txt).
> >>>
> >>>Ok, thanks for the pointer. So in dt we are just have uefi memmap infomation
> >>>instead of memory nodes details..
> >>
> >>When booted via EFI, yes.
> >>
> >>For NUMA topology in !ACPI kernels, we might need to also retain and
> >>parse memory nodes, but only for toplogy information. The kernel would
> >>still only use memory as described by the EFI memory map.
> >>
> >>There's a horrible edge case I've spotted if performing a chain of
> >>cross-endian kexecs: LE -> BE -> LE, as the BE kernel would have to
> >>respect the EFI memory map so as to avoid corrupting it for the
> >>subsequent LE kernel. Other than this I believe everything should just
> >>work.
> >
> >Firmware do not know kernel endianniess, kernel should respect firmware
> >maps and adapt to it, it sounds like a generic issue not specfic to kexec.
> 
> On arm64, a kernel image header has a bit field to specify the image's endianness.
> Anyway, our current implementation replies on a user-supplied dtb to start BE kernel.

Ok, I means uefi memmap are same, not specific to LE or BE.

> 
> >>
> >>>>A kexec'd kernel should simply inherit that. So long as the DTB and/or
> >>>>UEFI tables in memory are the same, it would be the same as a cold boot.
> >>>
> >>>For kexec all memory ranges are same, for kdump we need use original reserved
> >>>range with crashkernel= as usable memory and all other orignal usable ranges
> >>>are not usable anymore.
> >>
> >>Sure. This is what I believe we should expose with an additional
> >>property under /chosen, while keeping everything else pristine.
> >>
> >>The crash kernel can then limit itself to that region, while it would
> >>have the information of the full memory map (which it could log and/or
> >>use to drive other dumping).
> >
> >In this way kernel should be aware it is a kdump booting, it is doable though
> >I feel it is better for kdump kernel in a black box with infomations it
> >can use just like the 1st kernel. Things here is where we choose to cook
> >the memory infomation in boot loader or in kernel itself.
> >
> >>
> >>>Is it possible to modify uefi memmap for kdump case?
> >>
> >>Technically it would be possible, however I don't think it's necessary,
> >>and I think it would be disadvantageous to do so.
> >>
> >>Describing the range(s) the crash kernel can use in separate properties
> >>under /chosen has a number of advantages.
> >
> >Ok, I got the points. We have a is_kdump_kernel() by checking if there is
> >elfcorehdr_addr kernel cmdline. This is mainly for some drivers which
> >do not work well in kdump kernel some uncertain reasons. But ideally I
> >think kernel should handle things just like in 1st kernel and avoid to use
> >it.
> 
> So I'm not still sure about what are advantages of a property under /chosen
> over "memmap=" kernel parameter.
> Both are simple and can have the same effect with minimizing changes to dtb.
> (But if, in the latter case, we have to provide *all* the memory-related information
> through "memmap=" parameters, it would be much complicated.)

Maybe I did not say it clearly, I prefer kexec syscall/tool to modifiy dtb
or uefi-memmap so that we do not need any extra kernel cmdline.

For x86 we would like to drop the memmap= usage in kexec-tools but we can not
do that for a compatibility problem about calgary iommu. So that currently
kexec-tools supports both recreating E820 maps and passing memmap=.

We should think it carefully because it will be hard to remove once we support it.
IMO handling it in code is better than using an external interface.

> 
> -Takahiro AKASHI
> 
> >>
> >>>>In the !EFI case, we use the memory nodes in the DTB. Only in this case
> >>>>could usable-memory properties in memory nodes make sense. I'd prefer a
> >>>>uniform property under /chosen for both cases.
> >>>
> >>>We stil use same DTB, need to modify the DT and update the usable and unusable
> >>>nodes for kdump?
> >>
> >>We'd have a (slightly) modified DTB that contained additional properties
> >>describing the range(s) reserved for use by the crash kernel.
> >>
> >>Other than those properties under /chosen (e.g. the command line, initrd
> >>pointers if any), it would be the original DTB.
> >>
> >>Thanks,
> >>Mark.
> >
> >Thanks
> >Dave
> >

Thanks
Dave

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 18/19] arm64: kdump: update a kernel doc
  2016-01-20  6:38                         ` Dave Young
@ 2016-01-20  7:00                           ` Dave Young
  -1 siblings, 0 replies; 174+ messages in thread
From: Dave Young @ 2016-01-20  7:00 UTC (permalink / raw)
  To: linux-arm-kernel

> > So I'm not still sure about what are advantages of a property under /chosen
> > over "memmap=" kernel parameter.
> > Both are simple and can have the same effect with minimizing changes to dtb.
> > (But if, in the latter case, we have to provide *all* the memory-related information
> > through "memmap=" parameters, it would be much complicated.)
> 
> Maybe I did not say it clearly, I prefer kexec syscall/tool to modifiy dtb
> or uefi-memmap so that we do not need any extra kernel cmdline.
> 
> For x86 we would like to drop the memmap= usage in kexec-tools but we can not
> do that for a compatibility problem about calgary iommu. So that currently
> kexec-tools supports both recreating E820 maps and passing memmap=.
> 
> We should think it carefully because it will be hard to remove once we support it.
> IMO handling it in code is better than using an external interface.
> 

Also seems semantic of memmap=exactmap is different than current use in the implementation
exactmap means we need pass each range seperately including reserved, acpi and other types
We can not reuse ranges in uefi memmap for other than usable memory.

It will also have the cmdline array size issue.

Thanks
Dave

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 18/19] arm64: kdump: update a kernel doc
@ 2016-01-20  7:00                           ` Dave Young
  0 siblings, 0 replies; 174+ messages in thread
From: Dave Young @ 2016-01-20  7:00 UTC (permalink / raw)
  To: AKASHI Takahiro
  Cc: Mark Rutland, ard.biesheuvel, Geoff Levand, Catalin Marinas,
	Will Deacon, marc.zyngier, James Morse, christoffer.dall, kexec,
	linux-arm-kernel

> > So I'm not still sure about what are advantages of a property under /chosen
> > over "memmap=" kernel parameter.
> > Both are simple and can have the same effect with minimizing changes to dtb.
> > (But if, in the latter case, we have to provide *all* the memory-related information
> > through "memmap=" parameters, it would be much complicated.)
> 
> Maybe I did not say it clearly, I prefer kexec syscall/tool to modifiy dtb
> or uefi-memmap so that we do not need any extra kernel cmdline.
> 
> For x86 we would like to drop the memmap= usage in kexec-tools but we can not
> do that for a compatibility problem about calgary iommu. So that currently
> kexec-tools supports both recreating E820 maps and passing memmap=.
> 
> We should think it carefully because it will be hard to remove once we support it.
> IMO handling it in code is better than using an external interface.
> 

Also seems semantic of memmap=exactmap is different than current use in the implementation
exactmap means we need pass each range seperately including reserved, acpi and other types
We can not reuse ranges in uefi memmap for other than usable memory.

It will also have the cmdline array size issue.

Thanks
Dave

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 18/19] arm64: kdump: update a kernel doc
  2016-01-20  7:00                           ` Dave Young
@ 2016-01-20  8:01                             ` AKASHI Takahiro
  -1 siblings, 0 replies; 174+ messages in thread
From: AKASHI Takahiro @ 2016-01-20  8:01 UTC (permalink / raw)
  To: linux-arm-kernel

On 01/20/2016 04:00 PM, Dave Young wrote:
>>> So I'm not still sure about what are advantages of a property under /chosen
>>> over "memmap=" kernel parameter.
>>> Both are simple and can have the same effect with minimizing changes to dtb.
>>> (But if, in the latter case, we have to provide *all* the memory-related information
>>> through "memmap=" parameters, it would be much complicated.)
>>
>> Maybe I did not say it clearly, I prefer kexec syscall/tool to modifiy dtb
>> or uefi-memmap so that we do not need any extra kernel cmdline.

Yes, I understand.
But on arm64, kexec-tools can generate a "memmap=" parameter for crash kernel's
memory region without any user's interaction.
(please note that this parameter eventually goes into dtb's cmdline property in
/chosen.)

In this sense, it is no different from an extra property under /chosen
as kexec-tools can also add it to dtb passed to the crash dump kernel.

(See what I mean?)

>> For x86 we would like to drop the memmap= usage in kexec-tools

I didn't know that :)

>> but we can not
>> do that for a compatibility problem about calgary iommu. So that currently
>> kexec-tools supports both recreating E820 maps and passing memmap=.
>>
>> We should think it carefully because it will be hard to remove once we support it.

Absolutely.

>> IMO handling it in code is better than using an external interface.
>
> Also seems semantic of memmap=exactmap is different than current use in the implementation
> exactmap means we need pass each range seperately including reserved, acpi and other types
> We can not reuse ranges in uefi memmap for other than usable memory.


If necessary, we may use a different name, say, "usablememmap=" for arm64
or just extend "mem=" semantics (allowing XX at YY format) to avoid any confusion.

Thanks,
-Takahiro AKASHI

> It will also have the cmdline array size issue.k
 >
> Thanks
> Dave
>

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 18/19] arm64: kdump: update a kernel doc
@ 2016-01-20  8:01                             ` AKASHI Takahiro
  0 siblings, 0 replies; 174+ messages in thread
From: AKASHI Takahiro @ 2016-01-20  8:01 UTC (permalink / raw)
  To: Dave Young
  Cc: Mark Rutland, ard.biesheuvel, Geoff Levand, Catalin Marinas,
	Will Deacon, marc.zyngier, James Morse, christoffer.dall, kexec,
	linux-arm-kernel

On 01/20/2016 04:00 PM, Dave Young wrote:
>>> So I'm not still sure about what are advantages of a property under /chosen
>>> over "memmap=" kernel parameter.
>>> Both are simple and can have the same effect with minimizing changes to dtb.
>>> (But if, in the latter case, we have to provide *all* the memory-related information
>>> through "memmap=" parameters, it would be much complicated.)
>>
>> Maybe I did not say it clearly, I prefer kexec syscall/tool to modifiy dtb
>> or uefi-memmap so that we do not need any extra kernel cmdline.

Yes, I understand.
But on arm64, kexec-tools can generate a "memmap=" parameter for crash kernel's
memory region without any user's interaction.
(please note that this parameter eventually goes into dtb's cmdline property in
/chosen.)

In this sense, it is no different from an extra property under /chosen
as kexec-tools can also add it to dtb passed to the crash dump kernel.

(See what I mean?)

>> For x86 we would like to drop the memmap= usage in kexec-tools

I didn't know that :)

>> but we can not
>> do that for a compatibility problem about calgary iommu. So that currently
>> kexec-tools supports both recreating E820 maps and passing memmap=.
>>
>> We should think it carefully because it will be hard to remove once we support it.

Absolutely.

>> IMO handling it in code is better than using an external interface.
>
> Also seems semantic of memmap=exactmap is different than current use in the implementation
> exactmap means we need pass each range seperately including reserved, acpi and other types
> We can not reuse ranges in uefi memmap for other than usable memory.


If necessary, we may use a different name, say, "usablememmap=" for arm64
or just extend "mem=" semantics (allowing XX@YY format) to avoid any confusion.

Thanks,
-Takahiro AKASHI

> It will also have the cmdline array size issue.k
 >
> Thanks
> Dave
>

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 18/19] arm64: kdump: update a kernel doc
  2016-01-20  8:01                             ` AKASHI Takahiro
@ 2016-01-20  8:26                               ` Dave Young
  -1 siblings, 0 replies; 174+ messages in thread
From: Dave Young @ 2016-01-20  8:26 UTC (permalink / raw)
  To: linux-arm-kernel

On 01/20/16 at 05:01pm, AKASHI Takahiro wrote:
> On 01/20/2016 04:00 PM, Dave Young wrote:
> >>>So I'm not still sure about what are advantages of a property under /chosen
> >>>over "memmap=" kernel parameter.
> >>>Both are simple and can have the same effect with minimizing changes to dtb.
> >>>(But if, in the latter case, we have to provide *all* the memory-related information
> >>>through "memmap=" parameters, it would be much complicated.)
> >>
> >>Maybe I did not say it clearly, I prefer kexec syscall/tool to modifiy dtb
> >>or uefi-memmap so that we do not need any extra kernel cmdline.
> 
> Yes, I understand.
> But on arm64, kexec-tools can generate a "memmap=" parameter for crash kernel's
> memory region without any user's interaction.
> (please note that this parameter eventually goes into dtb's cmdline property in
> /chosen.)
> 
> In this sense, it is no different from an extra property under /chosen
> as kexec-tools can also add it to dtb passed to the crash dump kernel.
> 
> (See what I mean?)

I think I understand your points, what I would prefer is not an extra property
but modifying uefi memmap or recreating memory nodes for !EFI to be used in kdump kernel.

> 
> >>For x86 we would like to drop the memmap= usage in kexec-tools
> 
> I didn't know that :)
> 
> >>but we can not
> >>do that for a compatibility problem about calgary iommu. So that currently
> >>kexec-tools supports both recreating E820 maps and passing memmap=.
> >>
> >>We should think it carefully because it will be hard to remove once we support it.
> 
> Absolutely.
> 
> >>IMO handling it in code is better than using an external interface.
> >
> >Also seems semantic of memmap=exactmap is different than current use in the implementation
> >exactmap means we need pass each range seperately including reserved, acpi and other types
> >We can not reuse ranges in uefi memmap for other than usable memory.
> 
> 
> If necessary, we may use a different name, say, "usablememmap=" for arm64
> or just extend "mem=" semantics (allowing XX at YY format) to avoid any confusion.

For either of above what is the 1st kernel behavior with these params?

> 
> Thanks,
> -Takahiro AKASHI
> 
> >It will also have the cmdline array size issue.k
> >
> >Thanks
> >Dave
> >

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 18/19] arm64: kdump: update a kernel doc
@ 2016-01-20  8:26                               ` Dave Young
  0 siblings, 0 replies; 174+ messages in thread
From: Dave Young @ 2016-01-20  8:26 UTC (permalink / raw)
  To: AKASHI Takahiro
  Cc: Mark Rutland, ard.biesheuvel, Geoff Levand, Catalin Marinas,
	Will Deacon, marc.zyngier, James Morse, christoffer.dall, kexec,
	linux-arm-kernel

On 01/20/16 at 05:01pm, AKASHI Takahiro wrote:
> On 01/20/2016 04:00 PM, Dave Young wrote:
> >>>So I'm not still sure about what are advantages of a property under /chosen
> >>>over "memmap=" kernel parameter.
> >>>Both are simple and can have the same effect with minimizing changes to dtb.
> >>>(But if, in the latter case, we have to provide *all* the memory-related information
> >>>through "memmap=" parameters, it would be much complicated.)
> >>
> >>Maybe I did not say it clearly, I prefer kexec syscall/tool to modifiy dtb
> >>or uefi-memmap so that we do not need any extra kernel cmdline.
> 
> Yes, I understand.
> But on arm64, kexec-tools can generate a "memmap=" parameter for crash kernel's
> memory region without any user's interaction.
> (please note that this parameter eventually goes into dtb's cmdline property in
> /chosen.)
> 
> In this sense, it is no different from an extra property under /chosen
> as kexec-tools can also add it to dtb passed to the crash dump kernel.
> 
> (See what I mean?)

I think I understand your points, what I would prefer is not an extra property
but modifying uefi memmap or recreating memory nodes for !EFI to be used in kdump kernel.

> 
> >>For x86 we would like to drop the memmap= usage in kexec-tools
> 
> I didn't know that :)
> 
> >>but we can not
> >>do that for a compatibility problem about calgary iommu. So that currently
> >>kexec-tools supports both recreating E820 maps and passing memmap=.
> >>
> >>We should think it carefully because it will be hard to remove once we support it.
> 
> Absolutely.
> 
> >>IMO handling it in code is better than using an external interface.
> >
> >Also seems semantic of memmap=exactmap is different than current use in the implementation
> >exactmap means we need pass each range seperately including reserved, acpi and other types
> >We can not reuse ranges in uefi memmap for other than usable memory.
> 
> 
> If necessary, we may use a different name, say, "usablememmap=" for arm64
> or just extend "mem=" semantics (allowing XX@YY format) to avoid any confusion.

For either of above what is the 1st kernel behavior with these params?

> 
> Thanks,
> -Takahiro AKASHI
> 
> >It will also have the cmdline array size issue.k
> >
> >Thanks
> >Dave
> >

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 18/19] arm64: kdump: update a kernel doc
  2016-01-20  2:49                     ` Dave Young
@ 2016-01-20 11:28                       ` Mark Rutland
  -1 siblings, 0 replies; 174+ messages in thread
From: Mark Rutland @ 2016-01-20 11:28 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, Jan 20, 2016 at 10:49:46AM +0800, Dave Young wrote:
> On 01/19/16 at 02:01pm, Mark Rutland wrote:
> > On Tue, Jan 19, 2016 at 09:45:53PM +0800, Dave Young wrote:
> > > On 01/19/16 at 12:51pm, Mark Rutland wrote:
> > > > On Tue, Jan 19, 2016 at 08:28:48PM +0800, Dave Young wrote:
> > > > > On 01/19/16 at 02:35pm, AKASHI Takahiro wrote:
> > > > > > On 01/19/2016 10:43 AM, Dave Young wrote:
> > > > > > >X86 takes another way in latest kexec-tools and kexec_file_load, that is
> > > > > > >recreating E820 table and pass it to kexec/kdump kernel, if the entries
> > > > > > >are over E820 limitation then turn to use setup_data list for remain
> > > > > > >entries.
> > > > > > 
> > > > > > Thanks. I will visit x86 code again.
> > > > > > 
> > > > > > >I think it is X86 specific. Personally I think device tree property is
> > > > > > >better.
> > > > > > 
> > > > > > Do you think so?
> > > > > 
> > > > > I'm not sure it is the best way. For X86 we run into problem with
> > > > > memmap= design, one example is pci domain X (X>1) need the pci memory
> > > > > ranges being passed to kdump kernel. When we passed reserved ranges in /proc/iomem
> > > > > to 2nd kernel we find that cmdline[] array is not big enough.
> > > > 
> > > > I'm not sure how PCI ranges relate to the memory map used for normal
> > > > memory (i.e. RAM), though I'm probably missing some caveat with the way
> > > > ACPI and UEFI describe PCI. Why does memmap= affect PCI memory?
> > > 
> > > Here is the old patch which was rejected in kexec-tools:
> > > http://lists.infradead.org/pipermail/kexec/2013-February/007924.html
> > > 
> > > > 
> > > > If the kernel got the rest of its system topology from DT, the PCI
> > > > regions would be described there.
> > > 
> > > Yes, if kdump kernel use same DT as 1st kernel.
> > 
> > Other than for testing purposes, I don't see why you'd pass the kdump
> > kernel a DTB inconsistent with that the 1st kernel was passsed (other
> > than some proerties under /chosen).
> > 
> > We added /sys/firmware/fdt specifically to allow the kexec tools to get
> > the exact DTB the first kernel used. There's no reason for tools to have
> > to make something up.
> 
> Agreed, but kexec-tools has an option to pass in any dtb files. Who knows
> how one will use it unless dropping the option and use /sys/firmware/fdt
> unconditionally. 

I think this is a tangential discussion. I think it's fine to say that
for kdump we do not expect this -- a user would be shooting themselves
in the foot if they did. Regardless, I was under the impression that
kdump was usually set up by distribution-provided init code.

or kdump, which typically is set up automatically by the OS, 

> If we choose to implement kexec_file_load only in kernel, the interfaces
> provided are kernel, initrd and cmdline. We can always use same dtb.

There are use-cases where being in complete control of the purgatory
code is necessary. For example, the next OS might not be Linux (and
might not accept a DTB, or have different requirements on the initial
register state).

Regardless of the need for something like kexec_file_load for kdump in
Secure Boot environments, there is also a need for kexec_load with the
user having complete control.

> > > > > Do you think for arm64 only usable memory is necessary to let kdump kernel
> > > > > know? I'm curious about how arm64 kernel get all memory layout from boot loader,
> > > > > via UEFI memmap?
> > > > 
> > > > When booted via EFI, we use the EFI memory map. The EFI stub handles
> > > > acquring the relevant information and passing that to the first kernel
> > > > in the DTB (see Documentation/arm/uefi.txt).
> > > 
> > > Ok, thanks for the pointer. So in dt we are just have uefi memmap infomation
> > > instead of memory nodes details.. 
> > 
> > When booted via EFI, yes.
> > 
> > For NUMA topology in !ACPI kernels, we might need to also retain and
> > parse memory nodes, but only for toplogy information. The kernel would
> > still only use memory as described by the EFI memory map.
> > 
> > There's a horrible edge case I've spotted if performing a chain of
> > cross-endian kexecs: LE -> BE -> LE, as the BE kernel would have to
> > respect the EFI memory map so as to avoid corrupting it for the
> > subsequent LE kernel. Other than this I believe everything should just
> > work.
> 
> Firmware do not know kernel endianniess, kernel should respect firmware
> maps and adapt to it, it sounds like a generic issue not specfic to kexec.

I agree that this isn't kexec's fault as such, but in the absence of
kexec, the above issue does not exist, so one can't consider it in
isolation.

> > > > A kexec'd kernel should simply inherit that. So long as the DTB and/or
> > > > UEFI tables in memory are the same, it would be the same as a cold boot.
> > > 
> > > For kexec all memory ranges are same, for kdump we need use original reserved
> > > range with crashkernel= as usable memory and all other orignal usable ranges
> > > are not usable anymore. 
> > 
> > Sure. This is what I believe we should expose with an additional
> > property under /chosen, while keeping everything else pristine.
> > 
> > The crash kernel can then limit itself to that region, while it would
> > have the information of the full memory map (which it could log and/or
> > use to drive other dumping).
> 
> In this way kernel should be aware it is a kdump booting, it is doable though
> I feel it is better for kdump kernel in a black box with infomations it
> can use just like the 1st kernel. Things here is where we choose to cook
> the memory infomation in boot loader or in kernel itself.

Sorry, I can't follow what you are trying to say here. Could you
elaborate?

> > > Is it possible to modify uefi memmap for kdump case?
> > 
> > Technically it would be possible, however I don't think it's necessary,
> > and I think it would be disadvantageous to do so.
> > 
> > Describing the range(s) the crash kernel can use in separate properties
> > under /chosen has a number of advantages.
> 
> Ok, I got the points. We have a is_kdump_kernel() by checking if there is
> elfcorehdr_addr kernel cmdline. This is mainly for some drivers which
> do not work well in kdump kernel some uncertain reasons. But ideally I
> think kernel should handle things just like in 1st kernel and avoid to use
> it. 

I agree that we should not have kexec/kdump knowledge spread throughout
the kernel, and that the boot protocol should be uniform with a cold
boot as far as possible.

However, requiring userspace or the first kernel to modify
firmware-provided information has a number of risks and reduces the
amount of information available to the kdump kernel. To that end I am
opposed to modifying the memory nodes in the DTB, or to modifying the
EFI memory map.

Having a property in the DTB describing the range(s) of memory reserved
for use by the kdump kernel is vastly simpler, and avoids those risks:

* It requires a tiny amount of self-contained code in the kdump kernel
  to parse the property and apply the constraints imposed (i.e. carve up
  memblock).

  This is easy to contain in a single function (or at least within a
  single file), and need not affect drivers or other code.

* It is uniform regardless of whether the EFI memory map, DT memory
  nodes, or some other mechanism is used to discover memory in the
  systems.

  This makes it easy to impose the restrictions consistently, and is
  somewhat future-proof.

* Userspace or the first kernel to not need to parse and modify an
  arbitrary amount of data (which might be in an extended format it
  doesn't fully understand). There is less risk for this to go wrong.

  It is far easier to add a property than it is to correctly modify the
  EFI memory map, memory nodes, or some other data structure. There is
  less risk, and it is somewhat future-proof.
  
* The original memory map information is preserved, even though unused.
  This may be useful for debugging, and it may turn out that the kdump
  kernel needs to know about certain portions of the original memory
  map, even if we are not currently aware of why we would need this.

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 18/19] arm64: kdump: update a kernel doc
@ 2016-01-20 11:28                       ` Mark Rutland
  0 siblings, 0 replies; 174+ messages in thread
From: Mark Rutland @ 2016-01-20 11:28 UTC (permalink / raw)
  To: Dave Young
  Cc: ard.biesheuvel, Geoff Levand, Catalin Marinas, Will Deacon,
	AKASHI Takahiro, James Morse, christoffer.dall, marc.zyngier,
	kexec, linux-arm-kernel

On Wed, Jan 20, 2016 at 10:49:46AM +0800, Dave Young wrote:
> On 01/19/16 at 02:01pm, Mark Rutland wrote:
> > On Tue, Jan 19, 2016 at 09:45:53PM +0800, Dave Young wrote:
> > > On 01/19/16 at 12:51pm, Mark Rutland wrote:
> > > > On Tue, Jan 19, 2016 at 08:28:48PM +0800, Dave Young wrote:
> > > > > On 01/19/16 at 02:35pm, AKASHI Takahiro wrote:
> > > > > > On 01/19/2016 10:43 AM, Dave Young wrote:
> > > > > > >X86 takes another way in latest kexec-tools and kexec_file_load, that is
> > > > > > >recreating E820 table and pass it to kexec/kdump kernel, if the entries
> > > > > > >are over E820 limitation then turn to use setup_data list for remain
> > > > > > >entries.
> > > > > > 
> > > > > > Thanks. I will visit x86 code again.
> > > > > > 
> > > > > > >I think it is X86 specific. Personally I think device tree property is
> > > > > > >better.
> > > > > > 
> > > > > > Do you think so?
> > > > > 
> > > > > I'm not sure it is the best way. For X86 we run into problem with
> > > > > memmap= design, one example is pci domain X (X>1) need the pci memory
> > > > > ranges being passed to kdump kernel. When we passed reserved ranges in /proc/iomem
> > > > > to 2nd kernel we find that cmdline[] array is not big enough.
> > > > 
> > > > I'm not sure how PCI ranges relate to the memory map used for normal
> > > > memory (i.e. RAM), though I'm probably missing some caveat with the way
> > > > ACPI and UEFI describe PCI. Why does memmap= affect PCI memory?
> > > 
> > > Here is the old patch which was rejected in kexec-tools:
> > > http://lists.infradead.org/pipermail/kexec/2013-February/007924.html
> > > 
> > > > 
> > > > If the kernel got the rest of its system topology from DT, the PCI
> > > > regions would be described there.
> > > 
> > > Yes, if kdump kernel use same DT as 1st kernel.
> > 
> > Other than for testing purposes, I don't see why you'd pass the kdump
> > kernel a DTB inconsistent with that the 1st kernel was passsed (other
> > than some proerties under /chosen).
> > 
> > We added /sys/firmware/fdt specifically to allow the kexec tools to get
> > the exact DTB the first kernel used. There's no reason for tools to have
> > to make something up.
> 
> Agreed, but kexec-tools has an option to pass in any dtb files. Who knows
> how one will use it unless dropping the option and use /sys/firmware/fdt
> unconditionally. 

I think this is a tangential discussion. I think it's fine to say that
for kdump we do not expect this -- a user would be shooting themselves
in the foot if they did. Regardless, I was under the impression that
kdump was usually set up by distribution-provided init code.

or kdump, which typically is set up automatically by the OS, 

> If we choose to implement kexec_file_load only in kernel, the interfaces
> provided are kernel, initrd and cmdline. We can always use same dtb.

There are use-cases where being in complete control of the purgatory
code is necessary. For example, the next OS might not be Linux (and
might not accept a DTB, or have different requirements on the initial
register state).

Regardless of the need for something like kexec_file_load for kdump in
Secure Boot environments, there is also a need for kexec_load with the
user having complete control.

> > > > > Do you think for arm64 only usable memory is necessary to let kdump kernel
> > > > > know? I'm curious about how arm64 kernel get all memory layout from boot loader,
> > > > > via UEFI memmap?
> > > > 
> > > > When booted via EFI, we use the EFI memory map. The EFI stub handles
> > > > acquring the relevant information and passing that to the first kernel
> > > > in the DTB (see Documentation/arm/uefi.txt).
> > > 
> > > Ok, thanks for the pointer. So in dt we are just have uefi memmap infomation
> > > instead of memory nodes details.. 
> > 
> > When booted via EFI, yes.
> > 
> > For NUMA topology in !ACPI kernels, we might need to also retain and
> > parse memory nodes, but only for toplogy information. The kernel would
> > still only use memory as described by the EFI memory map.
> > 
> > There's a horrible edge case I've spotted if performing a chain of
> > cross-endian kexecs: LE -> BE -> LE, as the BE kernel would have to
> > respect the EFI memory map so as to avoid corrupting it for the
> > subsequent LE kernel. Other than this I believe everything should just
> > work.
> 
> Firmware do not know kernel endianniess, kernel should respect firmware
> maps and adapt to it, it sounds like a generic issue not specfic to kexec.

I agree that this isn't kexec's fault as such, but in the absence of
kexec, the above issue does not exist, so one can't consider it in
isolation.

> > > > A kexec'd kernel should simply inherit that. So long as the DTB and/or
> > > > UEFI tables in memory are the same, it would be the same as a cold boot.
> > > 
> > > For kexec all memory ranges are same, for kdump we need use original reserved
> > > range with crashkernel= as usable memory and all other orignal usable ranges
> > > are not usable anymore. 
> > 
> > Sure. This is what I believe we should expose with an additional
> > property under /chosen, while keeping everything else pristine.
> > 
> > The crash kernel can then limit itself to that region, while it would
> > have the information of the full memory map (which it could log and/or
> > use to drive other dumping).
> 
> In this way kernel should be aware it is a kdump booting, it is doable though
> I feel it is better for kdump kernel in a black box with infomations it
> can use just like the 1st kernel. Things here is where we choose to cook
> the memory infomation in boot loader or in kernel itself.

Sorry, I can't follow what you are trying to say here. Could you
elaborate?

> > > Is it possible to modify uefi memmap for kdump case?
> > 
> > Technically it would be possible, however I don't think it's necessary,
> > and I think it would be disadvantageous to do so.
> > 
> > Describing the range(s) the crash kernel can use in separate properties
> > under /chosen has a number of advantages.
> 
> Ok, I got the points. We have a is_kdump_kernel() by checking if there is
> elfcorehdr_addr kernel cmdline. This is mainly for some drivers which
> do not work well in kdump kernel some uncertain reasons. But ideally I
> think kernel should handle things just like in 1st kernel and avoid to use
> it. 

I agree that we should not have kexec/kdump knowledge spread throughout
the kernel, and that the boot protocol should be uniform with a cold
boot as far as possible.

However, requiring userspace or the first kernel to modify
firmware-provided information has a number of risks and reduces the
amount of information available to the kdump kernel. To that end I am
opposed to modifying the memory nodes in the DTB, or to modifying the
EFI memory map.

Having a property in the DTB describing the range(s) of memory reserved
for use by the kdump kernel is vastly simpler, and avoids those risks:

* It requires a tiny amount of self-contained code in the kdump kernel
  to parse the property and apply the constraints imposed (i.e. carve up
  memblock).

  This is easy to contain in a single function (or at least within a
  single file), and need not affect drivers or other code.

* It is uniform regardless of whether the EFI memory map, DT memory
  nodes, or some other mechanism is used to discover memory in the
  systems.

  This makes it easy to impose the restrictions consistently, and is
  somewhat future-proof.

* Userspace or the first kernel to not need to parse and modify an
  arbitrary amount of data (which might be in an extended format it
  doesn't fully understand). There is less risk for this to go wrong.

  It is far easier to add a property than it is to correctly modify the
  EFI memory map, memory nodes, or some other data structure. There is
  less risk, and it is somewhat future-proof.
  
* The original memory map information is preserved, even though unused.
  This may be useful for debugging, and it may turn out that the kdump
  kernel needs to know about certain portions of the original memory
  map, even if we are not currently aware of why we would need this.

Thanks,
Mark.

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 18/19] arm64: kdump: update a kernel doc
  2016-01-20  6:07                       ` AKASHI Takahiro
@ 2016-01-20 11:49                         ` Mark Rutland
  -1 siblings, 0 replies; 174+ messages in thread
From: Mark Rutland @ 2016-01-20 11:49 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, Jan 20, 2016 at 03:07:53PM +0900, AKASHI Takahiro wrote:
> On 01/20/2016 11:49 AM, Dave Young wrote:
> >On 01/19/16 at 02:01pm, Mark Rutland wrote:
> >>On Tue, Jan 19, 2016 at 09:45:53PM +0800, Dave Young wrote:
> >>>On 01/19/16 at 12:51pm, Mark Rutland wrote:
> >>>>On Tue, Jan 19, 2016 at 08:28:48PM +0800, Dave Young wrote:
> >>>>>On 01/19/16 at 02:35pm, AKASHI Takahiro wrote:
> >>>>>>On 01/19/2016 10:43 AM, Dave Young wrote:
> >>>>>>>X86 takes another way in latest kexec-tools and kexec_file_load, that is
> >>>>>>>recreating E820 table and pass it to kexec/kdump kernel, if the entries
> >>>>>>>are over E820 limitation then turn to use setup_data list for remain
> >>>>>>>entries.
> >>>>>>
> >>>>>>Thanks. I will visit x86 code again.
> >>>>>>
> >>>>>>>I think it is X86 specific. Personally I think device tree property is
> >>>>>>>better.
> >>>>>>
> >>>>>>Do you think so?
> >>>>>
> >>>>>I'm not sure it is the best way. For X86 we run into problem with
> >>>>>memmap= design, one example is pci domain X (X>1) need the pci memory
> >>>>>ranges being passed to kdump kernel. When we passed reserved ranges in /proc/iomem
> >>>>>to 2nd kernel we find that cmdline[] array is not big enough.
> >>>>
> >>>>I'm not sure how PCI ranges relate to the memory map used for normal
> >>>>memory (i.e. RAM), though I'm probably missing some caveat with the way
> >>>>ACPI and UEFI describe PCI. Why does memmap= affect PCI memory?
> >>>
> >>>Here is the old patch which was rejected in kexec-tools:
> >>>http://lists.infradead.org/pipermail/kexec/2013-February/007924.html
> >>>
> >>>>
> >>>>If the kernel got the rest of its system topology from DT, the PCI
> >>>>regions would be described there.
> >>>
> >>>Yes, if kdump kernel use same DT as 1st kernel.
> >>
> >>Other than for testing purposes, I don't see why you'd pass the kdump
> >>kernel a DTB inconsistent with that the 1st kernel was passsed (other
> >>than some proerties under /chosen).
> >>
> >>We added /sys/firmware/fdt specifically to allow the kexec tools to get
> >>the exact DTB the first kernel used. There's no reason for tools to have
> >>to make something up.
> >
> >Agreed, but kexec-tools has an option to pass in any dtb files. Who knows
> >how one will use it unless dropping the option and use /sys/firmware/fdt
> >unconditionally.
> 
> As a matter of fact, specifying proper command line parameters as well as
> dtb is partly users' responsibility for kdump to work correctly.
> (especially for BE kernel)
> 
> >If we choose to implement kexec_file_load only in kernel, the interfaces
> >provided are kernel, initrd and cmdline. We can always use same dtb.
> 
> I would say that we can always use the same dtb even with kexec_load
> from user's perspective. Right?

No.

This breaks using kexec for boot-loader purposes, and imposes a policy.

For better or worse kexec_file_load has always imposed a constrained
Linux-only policy, so that's a different story.

> >>There's a horrible edge case I've spotted if performing a chain of
> >>cross-endian kexecs: LE -> BE -> LE, as the BE kernel would have to
> >>respect the EFI memory map so as to avoid corrupting it for the
> >>subsequent LE kernel. Other than this I believe everything should just
> >>work.
> >
> >Firmware do not know kernel endianniess, kernel should respect firmware
> >maps and adapt to it, it sounds like a generic issue not specfic to kexec.
> 
> On arm64, a kernel image header has a bit field to specify the image's endianness.
> Anyway, our current implementation replies on a user-supplied dtb to start BE kernel.

The firmware should _never_ care about the kernel's endianness. The
bootlaoder or first kernel shouldn't care about the next kernel's
endianness apart from in exceptional circumstances. The DTB for a LE
kernel should look identical to that passed to a BE kernel.

In my mind, the only valid reason to look at that bit is so that
bootloaders can provide a warning if the CPU does not implement that
endianness.

The issue I mention above should be solved by changes to the BE kernel.

> >>>Is it possible to modify uefi memmap for kdump case?
> >>
> >>Technically it would be possible, however I don't think it's necessary,
> >>and I think it would be disadvantageous to do so.
> >>
> >>Describing the range(s) the crash kernel can use in separate properties
> >>under /chosen has a number of advantages.
> >
> >Ok, I got the points. We have a is_kdump_kernel() by checking if there is
> >elfcorehdr_addr kernel cmdline. This is mainly for some drivers which
> >do not work well in kdump kernel some uncertain reasons. But ideally I
> >think kernel should handle things just like in 1st kernel and avoid to use
> >it.
> 
> So I'm not still sure about what are advantages of a property under /chosen
> over "memmap=" kernel parameter.
> Both are simple and can have the same effect with minimizing changes to dtb.
> (But if, in the latter case, we have to provide *all* the memory-related information
> through "memmap=" parameters, it would be much complicated.)

The reason I prefer a property over command line additions include:

* It keeps the command line simple (as you mention the opposite is
  "complicated").

* It is logically separate from options the user may pass to the kernel
  in that the restricted region(s) of memory avaialble are effectively
  properties of the system (in that the crashed OS is part of the system
  state).

* The semantics of the command line parsing can change subtly over time
  (for example, see 51e158c12aca3c9a, which terminates command line
  parseing at "--"). Maknig sure that a command line option will
  actually be parsed by the next kernel is not trivial.
  
  Keeping this information isolated from the command line is more
  robust.

* Addition of a property is a self-contained operation, that doesn't
  require any knowledge about the command line.

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 18/19] arm64: kdump: update a kernel doc
@ 2016-01-20 11:49                         ` Mark Rutland
  0 siblings, 0 replies; 174+ messages in thread
From: Mark Rutland @ 2016-01-20 11:49 UTC (permalink / raw)
  To: AKASHI Takahiro
  Cc: ard.biesheuvel, Geoff Levand, Catalin Marinas, Will Deacon,
	marc.zyngier, James Morse, christoffer.dall, Dave Young, kexec,
	linux-arm-kernel

On Wed, Jan 20, 2016 at 03:07:53PM +0900, AKASHI Takahiro wrote:
> On 01/20/2016 11:49 AM, Dave Young wrote:
> >On 01/19/16 at 02:01pm, Mark Rutland wrote:
> >>On Tue, Jan 19, 2016 at 09:45:53PM +0800, Dave Young wrote:
> >>>On 01/19/16 at 12:51pm, Mark Rutland wrote:
> >>>>On Tue, Jan 19, 2016 at 08:28:48PM +0800, Dave Young wrote:
> >>>>>On 01/19/16 at 02:35pm, AKASHI Takahiro wrote:
> >>>>>>On 01/19/2016 10:43 AM, Dave Young wrote:
> >>>>>>>X86 takes another way in latest kexec-tools and kexec_file_load, that is
> >>>>>>>recreating E820 table and pass it to kexec/kdump kernel, if the entries
> >>>>>>>are over E820 limitation then turn to use setup_data list for remain
> >>>>>>>entries.
> >>>>>>
> >>>>>>Thanks. I will visit x86 code again.
> >>>>>>
> >>>>>>>I think it is X86 specific. Personally I think device tree property is
> >>>>>>>better.
> >>>>>>
> >>>>>>Do you think so?
> >>>>>
> >>>>>I'm not sure it is the best way. For X86 we run into problem with
> >>>>>memmap= design, one example is pci domain X (X>1) need the pci memory
> >>>>>ranges being passed to kdump kernel. When we passed reserved ranges in /proc/iomem
> >>>>>to 2nd kernel we find that cmdline[] array is not big enough.
> >>>>
> >>>>I'm not sure how PCI ranges relate to the memory map used for normal
> >>>>memory (i.e. RAM), though I'm probably missing some caveat with the way
> >>>>ACPI and UEFI describe PCI. Why does memmap= affect PCI memory?
> >>>
> >>>Here is the old patch which was rejected in kexec-tools:
> >>>http://lists.infradead.org/pipermail/kexec/2013-February/007924.html
> >>>
> >>>>
> >>>>If the kernel got the rest of its system topology from DT, the PCI
> >>>>regions would be described there.
> >>>
> >>>Yes, if kdump kernel use same DT as 1st kernel.
> >>
> >>Other than for testing purposes, I don't see why you'd pass the kdump
> >>kernel a DTB inconsistent with that the 1st kernel was passsed (other
> >>than some proerties under /chosen).
> >>
> >>We added /sys/firmware/fdt specifically to allow the kexec tools to get
> >>the exact DTB the first kernel used. There's no reason for tools to have
> >>to make something up.
> >
> >Agreed, but kexec-tools has an option to pass in any dtb files. Who knows
> >how one will use it unless dropping the option and use /sys/firmware/fdt
> >unconditionally.
> 
> As a matter of fact, specifying proper command line parameters as well as
> dtb is partly users' responsibility for kdump to work correctly.
> (especially for BE kernel)
> 
> >If we choose to implement kexec_file_load only in kernel, the interfaces
> >provided are kernel, initrd and cmdline. We can always use same dtb.
> 
> I would say that we can always use the same dtb even with kexec_load
> from user's perspective. Right?

No.

This breaks using kexec for boot-loader purposes, and imposes a policy.

For better or worse kexec_file_load has always imposed a constrained
Linux-only policy, so that's a different story.

> >>There's a horrible edge case I've spotted if performing a chain of
> >>cross-endian kexecs: LE -> BE -> LE, as the BE kernel would have to
> >>respect the EFI memory map so as to avoid corrupting it for the
> >>subsequent LE kernel. Other than this I believe everything should just
> >>work.
> >
> >Firmware do not know kernel endianniess, kernel should respect firmware
> >maps and adapt to it, it sounds like a generic issue not specfic to kexec.
> 
> On arm64, a kernel image header has a bit field to specify the image's endianness.
> Anyway, our current implementation replies on a user-supplied dtb to start BE kernel.

The firmware should _never_ care about the kernel's endianness. The
bootlaoder or first kernel shouldn't care about the next kernel's
endianness apart from in exceptional circumstances. The DTB for a LE
kernel should look identical to that passed to a BE kernel.

In my mind, the only valid reason to look at that bit is so that
bootloaders can provide a warning if the CPU does not implement that
endianness.

The issue I mention above should be solved by changes to the BE kernel.

> >>>Is it possible to modify uefi memmap for kdump case?
> >>
> >>Technically it would be possible, however I don't think it's necessary,
> >>and I think it would be disadvantageous to do so.
> >>
> >>Describing the range(s) the crash kernel can use in separate properties
> >>under /chosen has a number of advantages.
> >
> >Ok, I got the points. We have a is_kdump_kernel() by checking if there is
> >elfcorehdr_addr kernel cmdline. This is mainly for some drivers which
> >do not work well in kdump kernel some uncertain reasons. But ideally I
> >think kernel should handle things just like in 1st kernel and avoid to use
> >it.
> 
> So I'm not still sure about what are advantages of a property under /chosen
> over "memmap=" kernel parameter.
> Both are simple and can have the same effect with minimizing changes to dtb.
> (But if, in the latter case, we have to provide *all* the memory-related information
> through "memmap=" parameters, it would be much complicated.)

The reason I prefer a property over command line additions include:

* It keeps the command line simple (as you mention the opposite is
  "complicated").

* It is logically separate from options the user may pass to the kernel
  in that the restricted region(s) of memory avaialble are effectively
  properties of the system (in that the crashed OS is part of the system
  state).

* The semantics of the command line parsing can change subtly over time
  (for example, see 51e158c12aca3c9a, which terminates command line
  parseing at "--"). Maknig sure that a command line option will
  actually be parsed by the next kernel is not trivial.
  
  Keeping this information isolated from the command line is more
  robust.

* Addition of a property is a self-contained operation, that doesn't
  require any knowledge about the command line.

Thanks,
Mark.

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 18/19] arm64: kdump: update a kernel doc
  2016-01-20  6:38                         ` Dave Young
@ 2016-01-20 11:54                           ` Mark Rutland
  -1 siblings, 0 replies; 174+ messages in thread
From: Mark Rutland @ 2016-01-20 11:54 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, Jan 20, 2016 at 02:38:56PM +0800, Dave Young wrote:
> Maybe I did not say it clearly, I prefer kexec syscall/tool to modifiy dtb
> or uefi-memmap so that we do not need any extra kernel cmdline.

I am strongly opposed to modifying the FW-provided memroy map
information, for the reasons I expressed in other replies.

What are your concerns with a property under /chosen?

> For x86 we would like to drop the memmap= usage in kexec-tools but we can not
> do that for a compatibility problem about calgary iommu. So that currently
> kexec-tools supports both recreating E820 maps and passing memmap=.
> 
> We should think it carefully because it will be hard to remove once we support it.

I agree that we don't want a plethora of solutions that we have to
support forever.

> IMO handling it in code is better than using an external interface.

I'm not sure what you mean by this. What is the "external interface",
and which code do you beleive it is better to handle this in?

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 18/19] arm64: kdump: update a kernel doc
@ 2016-01-20 11:54                           ` Mark Rutland
  0 siblings, 0 replies; 174+ messages in thread
From: Mark Rutland @ 2016-01-20 11:54 UTC (permalink / raw)
  To: Dave Young
  Cc: ard.biesheuvel, Geoff Levand, Catalin Marinas, Will Deacon,
	AKASHI Takahiro, James Morse, christoffer.dall, marc.zyngier,
	kexec, linux-arm-kernel

On Wed, Jan 20, 2016 at 02:38:56PM +0800, Dave Young wrote:
> Maybe I did not say it clearly, I prefer kexec syscall/tool to modifiy dtb
> or uefi-memmap so that we do not need any extra kernel cmdline.

I am strongly opposed to modifying the FW-provided memroy map
information, for the reasons I expressed in other replies.

What are your concerns with a property under /chosen?

> For x86 we would like to drop the memmap= usage in kexec-tools but we can not
> do that for a compatibility problem about calgary iommu. So that currently
> kexec-tools supports both recreating E820 maps and passing memmap=.
> 
> We should think it carefully because it will be hard to remove once we support it.

I agree that we don't want a plethora of solutions that we have to
support forever.

> IMO handling it in code is better than using an external interface.

I'm not sure what you mean by this. What is the "external interface",
and which code do you beleive it is better to handle this in?

Thanks,
Mark.

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 18/19] arm64: kdump: update a kernel doc
  2016-01-20  5:25                     ` AKASHI Takahiro
@ 2016-01-20 12:02                       ` Mark Rutland
  -1 siblings, 0 replies; 174+ messages in thread
From: Mark Rutland @ 2016-01-20 12:02 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, Jan 20, 2016 at 02:25:07PM +0900, AKASHI Takahiro wrote:
> On 01/19/2016 11:01 PM, Mark Rutland wrote:
> >For NUMA topology in !ACPI kernels, we might need to also retain and
> >parse memory nodes, but only for toplogy information. The kernel would
> >still only use memory as described by the EFI memory map.
> >
> >There's a horrible edge case I've spotted if performing a chain of
> >cross-endian kexecs: LE -> BE -> LE, as the BE kernel would have to
> >respect the EFI memory map so as to avoid corrupting it for the
> >subsequent LE kernel. Other than this I believe everything should just
> >work.
> 
> BE kernel doesn't support UEFI yet and cannot access UEFI memmap table. So,
> for LE -> BE, we don't use a dtb generated from /sys/firmware/fdt (or /proc/device-tree)
> (as in the case of LE -> LE) and require users to provide a dtb file explicitly.

As I mentioned above, the problem exists when memory nodes also exist
(for describing NUMA topology). In that case the BE kernel would try to
use the information from the memory nodes.

> For BE -> LE, BE kernel doesn't know wther UEFI memmap table is available or not
> and so use the same (explicitly-provided) dtb (as LE -> LE in !UEFI)

See above. The problem I imagine is:

LE kernel - uses EFI mmap, takes NUMA information from DT memory nodes

    v       kexec

BE kernel - uses DT memory nodes
          - clobbers EFI runtime regions as it sees them as available

    v       kexec

LE kernel - uses EFI mmap, takes NUMA information from DT memory nodes
          - tries to call EFI runtime services, and explodes.

> >>>A kexec'd kernel should simply inherit that. So long as the DTB and/or
> >>>UEFI tables in memory are the same, it would be the same as a cold boot.
> >>
> >>For kexec all memory ranges are same, for kdump we need use original reserved
> >>range with crashkernel= as usable memory and all other orignal usable ranges
> >>are not usable anymore.
> >
> >Sure. This is what I believe we should expose with an additional
> >property under /chosen, while keeping everything else pristine.
> >
> >The crash kernel can then limit itself to that region, while it would
> >have the information of the full memory map (which it could log and/or
> >use to drive other dumping).
> 
> FYI,
> all the original usable memory regions used by the 1st kernel are also
> described in an ELF core header specified by "elfcorehdr=" parameter to
> the crash dump kernel.

That only describes what the first kernel parsed and thus believed, not
exactly what the firmware described.

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 18/19] arm64: kdump: update a kernel doc
@ 2016-01-20 12:02                       ` Mark Rutland
  0 siblings, 0 replies; 174+ messages in thread
From: Mark Rutland @ 2016-01-20 12:02 UTC (permalink / raw)
  To: AKASHI Takahiro
  Cc: ard.biesheuvel, Geoff Levand, Catalin Marinas, Will Deacon,
	marc.zyngier, James Morse, christoffer.dall, Dave Young, kexec,
	linux-arm-kernel

On Wed, Jan 20, 2016 at 02:25:07PM +0900, AKASHI Takahiro wrote:
> On 01/19/2016 11:01 PM, Mark Rutland wrote:
> >For NUMA topology in !ACPI kernels, we might need to also retain and
> >parse memory nodes, but only for toplogy information. The kernel would
> >still only use memory as described by the EFI memory map.
> >
> >There's a horrible edge case I've spotted if performing a chain of
> >cross-endian kexecs: LE -> BE -> LE, as the BE kernel would have to
> >respect the EFI memory map so as to avoid corrupting it for the
> >subsequent LE kernel. Other than this I believe everything should just
> >work.
> 
> BE kernel doesn't support UEFI yet and cannot access UEFI memmap table. So,
> for LE -> BE, we don't use a dtb generated from /sys/firmware/fdt (or /proc/device-tree)
> (as in the case of LE -> LE) and require users to provide a dtb file explicitly.

As I mentioned above, the problem exists when memory nodes also exist
(for describing NUMA topology). In that case the BE kernel would try to
use the information from the memory nodes.

> For BE -> LE, BE kernel doesn't know wther UEFI memmap table is available or not
> and so use the same (explicitly-provided) dtb (as LE -> LE in !UEFI)

See above. The problem I imagine is:

LE kernel - uses EFI mmap, takes NUMA information from DT memory nodes

    v       kexec

BE kernel - uses DT memory nodes
          - clobbers EFI runtime regions as it sees them as available

    v       kexec

LE kernel - uses EFI mmap, takes NUMA information from DT memory nodes
          - tries to call EFI runtime services, and explodes.

> >>>A kexec'd kernel should simply inherit that. So long as the DTB and/or
> >>>UEFI tables in memory are the same, it would be the same as a cold boot.
> >>
> >>For kexec all memory ranges are same, for kdump we need use original reserved
> >>range with crashkernel= as usable memory and all other orignal usable ranges
> >>are not usable anymore.
> >
> >Sure. This is what I believe we should expose with an additional
> >property under /chosen, while keeping everything else pristine.
> >
> >The crash kernel can then limit itself to that region, while it would
> >have the information of the full memory map (which it could log and/or
> >use to drive other dumping).
> 
> FYI,
> all the original usable memory regions used by the 1st kernel are also
> described in an ELF core header specified by "elfcorehdr=" parameter to
> the crash dump kernel.

That only describes what the first kernel parsed and thus believed, not
exactly what the firmware described.

Thanks,
Mark.

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 18/19] arm64: kdump: update a kernel doc
  2016-01-20 12:02                       ` Mark Rutland
@ 2016-01-20 12:36                         ` Mark Rutland
  -1 siblings, 0 replies; 174+ messages in thread
From: Mark Rutland @ 2016-01-20 12:36 UTC (permalink / raw)
  To: linux-arm-kernel

Ard, Ganapatrao, the below is something we need to consider for the
combination of the NUMA & kexec approaches. It only becomes a problem
if/when we preserve DT memory nodes in the presence of EFI, though it
would be nice to not box ourselves into a corner.

On Wed, Jan 20, 2016 at 12:02:58PM +0000, Mark Rutland wrote:
> On Wed, Jan 20, 2016 at 02:25:07PM +0900, AKASHI Takahiro wrote:
> > On 01/19/2016 11:01 PM, Mark Rutland wrote:
> > >For NUMA topology in !ACPI kernels, we might need to also retain and
> > >parse memory nodes, but only for toplogy information. The kernel would
> > >still only use memory as described by the EFI memory map.
> > >
> > >There's a horrible edge case I've spotted if performing a chain of
> > >cross-endian kexecs: LE -> BE -> LE, as the BE kernel would have to
> > >respect the EFI memory map so as to avoid corrupting it for the
> > >subsequent LE kernel. Other than this I believe everything should just
> > >work.
> > 
> > BE kernel doesn't support UEFI yet and cannot access UEFI memmap table. So,
> > for LE -> BE, we don't use a dtb generated from /sys/firmware/fdt (or /proc/device-tree)
> > (as in the case of LE -> LE) and require users to provide a dtb file explicitly.
> 
> As I mentioned above, the problem exists when memory nodes also exist
> (for describing NUMA topology). In that case the BE kernel would try to
> use the information from the memory nodes.
> 
> > For BE -> LE, BE kernel doesn't know wther UEFI memmap table is available or not
> > and so use the same (explicitly-provided) dtb (as LE -> LE in !UEFI)
> 
> See above. The problem I imagine is:
> 
> LE kernel - uses EFI mmap, takes NUMA information from DT memory nodes
> 
>     v       kexec
> 
> BE kernel - uses DT memory nodes
>           - clobbers EFI runtime regions as it sees them as available
> 
>     v       kexec
> 
> LE kernel - uses EFI mmap, takes NUMA information from DT memory nodes
>           - tries to call EFI runtime services, and explodes.

I'm not really sure what the best approach is here, but I thought that
it would be good to raise awareness of the edge-case.

Mark.

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 18/19] arm64: kdump: update a kernel doc
@ 2016-01-20 12:36                         ` Mark Rutland
  0 siblings, 0 replies; 174+ messages in thread
From: Mark Rutland @ 2016-01-20 12:36 UTC (permalink / raw)
  To: AKASHI Takahiro, Ard Biesheuvel, Ganapatrao Kulkarni
  Cc: Geoff Levand, Catalin Marinas, Will Deacon, marc.zyngier,
	James Morse, linux-arm-kernel, Dave Young, kexec,
	christoffer.dall

Ard, Ganapatrao, the below is something we need to consider for the
combination of the NUMA & kexec approaches. It only becomes a problem
if/when we preserve DT memory nodes in the presence of EFI, though it
would be nice to not box ourselves into a corner.

On Wed, Jan 20, 2016 at 12:02:58PM +0000, Mark Rutland wrote:
> On Wed, Jan 20, 2016 at 02:25:07PM +0900, AKASHI Takahiro wrote:
> > On 01/19/2016 11:01 PM, Mark Rutland wrote:
> > >For NUMA topology in !ACPI kernels, we might need to also retain and
> > >parse memory nodes, but only for toplogy information. The kernel would
> > >still only use memory as described by the EFI memory map.
> > >
> > >There's a horrible edge case I've spotted if performing a chain of
> > >cross-endian kexecs: LE -> BE -> LE, as the BE kernel would have to
> > >respect the EFI memory map so as to avoid corrupting it for the
> > >subsequent LE kernel. Other than this I believe everything should just
> > >work.
> > 
> > BE kernel doesn't support UEFI yet and cannot access UEFI memmap table. So,
> > for LE -> BE, we don't use a dtb generated from /sys/firmware/fdt (or /proc/device-tree)
> > (as in the case of LE -> LE) and require users to provide a dtb file explicitly.
> 
> As I mentioned above, the problem exists when memory nodes also exist
> (for describing NUMA topology). In that case the BE kernel would try to
> use the information from the memory nodes.
> 
> > For BE -> LE, BE kernel doesn't know wther UEFI memmap table is available or not
> > and so use the same (explicitly-provided) dtb (as LE -> LE in !UEFI)
> 
> See above. The problem I imagine is:
> 
> LE kernel - uses EFI mmap, takes NUMA information from DT memory nodes
> 
>     v       kexec
> 
> BE kernel - uses DT memory nodes
>           - clobbers EFI runtime regions as it sees them as available
> 
>     v       kexec
> 
> LE kernel - uses EFI mmap, takes NUMA information from DT memory nodes
>           - tries to call EFI runtime services, and explodes.

I'm not really sure what the best approach is here, but I thought that
it would be good to raise awareness of the edge-case.

Mark.

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 03/19] arm64: Add new asm macro copy_page
  2016-01-15 19:18   ` Geoff Levand
@ 2016-01-20 14:01     ` James Morse
  -1 siblings, 0 replies; 174+ messages in thread
From: James Morse @ 2016-01-20 14:01 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Geoff,

On 15/01/16 19:18, Geoff Levand wrote:

Can I donate a commit message:

Kexec and hibernate need to copy pages of memory, but may not have all
of the kernel mapped, and are unable to call copy_page().

Convert copy_page() to a macro, so that it can be inlined in these
situations.


> Signed-off-by: Geoff Levand <geoff@infradead.org>
> ---
>  arch/arm64/include/asm/assembler.h | 19 +++++++++++++++++++
>  1 file changed, 19 insertions(+)
> 
> diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
> index 21979a4..c47a623 100644
> --- a/arch/arm64/include/asm/assembler.h
> +++ b/arch/arm64/include/asm/assembler.h
> @@ -24,6 +24,7 @@
>  #define __ASM_ASSEMBLER_H
>  
>  #include <asm/asm-offsets.h>
> +#include <asm/page.h>
>  #include <asm/pgtable-hwdef.h>
>  #include <asm/ptrace.h>
>  #include <asm/thread_info.h>
> @@ -240,6 +241,24 @@ lr	.req	x30		// link register
>  	.endm
>  
>  /*
> + * copy_page - copy src to dest using temp registers t1-t8
> + */
> +	.macro copy_page dest:req src:req t1:req t2:req t3:req t4:req t5:req t6:req t7:req t8:req
> +1:	ldp	\t1, \t2, [\src]

Using '1' as a label here means callers can't jump back over it using
the same label. The convention with USER() and alternatives is to use
unlikely values like '9999'.


> +	ldp	\t3, \t4, [\src, #16]
> +	ldp	\t5, \t6, [\src, #32]
> +	ldp	\t7, \t8, [\src, #48]
> +	add	\src, \src, #64
> +	stnp	\t1, \t2, [\dest]
> +	stnp	\t3, \t4, [\dest, #16]
> +	stnp	\t5, \t6, [\dest, #32]
> +	stnp	\t7, \t8, [\dest, #48]
> +	add	\dest, \dest, #64
> +	tst	\src, #(PAGE_SIZE - 1)
> +	b.ne	1b
> +	.endm

It may be worth updating lib/copy_page.S to use this macro.


I will fix the code in hibernate to use this!

Thanks,


James

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 03/19] arm64: Add new asm macro copy_page
@ 2016-01-20 14:01     ` James Morse
  0 siblings, 0 replies; 174+ messages in thread
From: James Morse @ 2016-01-20 14:01 UTC (permalink / raw)
  To: Geoff Levand
  Cc: Mark Rutland, marc.zyngier, Catalin Marinas, Will Deacon,
	AKASHI Takahiro, linux-arm-kernel, kexec, christoffer.dall

Hi Geoff,

On 15/01/16 19:18, Geoff Levand wrote:

Can I donate a commit message:

Kexec and hibernate need to copy pages of memory, but may not have all
of the kernel mapped, and are unable to call copy_page().

Convert copy_page() to a macro, so that it can be inlined in these
situations.


> Signed-off-by: Geoff Levand <geoff@infradead.org>
> ---
>  arch/arm64/include/asm/assembler.h | 19 +++++++++++++++++++
>  1 file changed, 19 insertions(+)
> 
> diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
> index 21979a4..c47a623 100644
> --- a/arch/arm64/include/asm/assembler.h
> +++ b/arch/arm64/include/asm/assembler.h
> @@ -24,6 +24,7 @@
>  #define __ASM_ASSEMBLER_H
>  
>  #include <asm/asm-offsets.h>
> +#include <asm/page.h>
>  #include <asm/pgtable-hwdef.h>
>  #include <asm/ptrace.h>
>  #include <asm/thread_info.h>
> @@ -240,6 +241,24 @@ lr	.req	x30		// link register
>  	.endm
>  
>  /*
> + * copy_page - copy src to dest using temp registers t1-t8
> + */
> +	.macro copy_page dest:req src:req t1:req t2:req t3:req t4:req t5:req t6:req t7:req t8:req
> +1:	ldp	\t1, \t2, [\src]

Using '1' as a label here means callers can't jump back over it using
the same label. The convention with USER() and alternatives is to use
unlikely values like '9999'.


> +	ldp	\t3, \t4, [\src, #16]
> +	ldp	\t5, \t6, [\src, #32]
> +	ldp	\t7, \t8, [\src, #48]
> +	add	\src, \src, #64
> +	stnp	\t1, \t2, [\dest]
> +	stnp	\t3, \t4, [\dest, #16]
> +	stnp	\t5, \t6, [\dest, #32]
> +	stnp	\t7, \t8, [\dest, #48]
> +	add	\dest, \dest, #64
> +	tst	\src, #(PAGE_SIZE - 1)
> +	b.ne	1b
> +	.endm

It may be worth updating lib/copy_page.S to use this macro.


I will fix the code in hibernate to use this!

Thanks,


James





_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 18/19] arm64: kdump: update a kernel doc
  2016-01-20 12:36                         ` Mark Rutland
@ 2016-01-20 14:59                           ` Ard Biesheuvel
  -1 siblings, 0 replies; 174+ messages in thread
From: Ard Biesheuvel @ 2016-01-20 14:59 UTC (permalink / raw)
  To: linux-arm-kernel

On 20 January 2016 at 13:36, Mark Rutland <mark.rutland@arm.com> wrote:
> Ard, Ganapatrao, the below is something we need to consider for the
> combination of the NUMA & kexec approaches. It only becomes a problem
> if/when we preserve DT memory nodes in the presence of EFI, though it
> would be nice to not box ourselves into a corner.
>
> On Wed, Jan 20, 2016 at 12:02:58PM +0000, Mark Rutland wrote:
>> On Wed, Jan 20, 2016 at 02:25:07PM +0900, AKASHI Takahiro wrote:
>> > On 01/19/2016 11:01 PM, Mark Rutland wrote:
>> > >For NUMA topology in !ACPI kernels, we might need to also retain and
>> > >parse memory nodes, but only for toplogy information. The kernel would
>> > >still only use memory as described by the EFI memory map.
>> > >
>> > >There's a horrible edge case I've spotted if performing a chain of
>> > >cross-endian kexecs: LE -> BE -> LE, as the BE kernel would have to
>> > >respect the EFI memory map so as to avoid corrupting it for the
>> > >subsequent LE kernel. Other than this I believe everything should just
>> > >work.
>> >
>> > BE kernel doesn't support UEFI yet and cannot access UEFI memmap table. So,
>> > for LE -> BE, we don't use a dtb generated from /sys/firmware/fdt (or /proc/device-tree)
>> > (as in the case of LE -> LE) and require users to provide a dtb file explicitly.
>>
>> As I mentioned above, the problem exists when memory nodes also exist
>> (for describing NUMA topology). In that case the BE kernel would try to
>> use the information from the memory nodes.
>>
>> > For BE -> LE, BE kernel doesn't know wther UEFI memmap table is available or not
>> > and so use the same (explicitly-provided) dtb (as LE -> LE in !UEFI)
>>
>> See above. The problem I imagine is:
>>
>> LE kernel - uses EFI mmap, takes NUMA information from DT memory nodes
>>
>>     v       kexec
>>
>> BE kernel - uses DT memory nodes
>>           - clobbers EFI runtime regions as it sees them as available
>>
>>     v       kexec
>>
>> LE kernel - uses EFI mmap, takes NUMA information from DT memory nodes
>>           - tries to call EFI runtime services, and explodes.
>
> I'm not really sure what the best approach is here, but I thought that
> it would be good to raise awareness of the edge-case.
>

I think we should simply allow the BE kernel to deal with a UEFI
memory map. It only involves a bit of byte swapping (which I already
implemented at some point)

It would require some minor refactoring to make the UEFI init code
separate from all the other bits, but I don't see any major issues
here

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 18/19] arm64: kdump: update a kernel doc
@ 2016-01-20 14:59                           ` Ard Biesheuvel
  0 siblings, 0 replies; 174+ messages in thread
From: Ard Biesheuvel @ 2016-01-20 14:59 UTC (permalink / raw)
  To: Mark Rutland
  Cc: Geoff Levand, Catalin Marinas, Will Deacon, AKASHI Takahiro,
	James Morse, linux-arm-kernel, Marc Zyngier, Ganapatrao Kulkarni,
	Dave Young, kexec, Christoffer Dall

On 20 January 2016 at 13:36, Mark Rutland <mark.rutland@arm.com> wrote:
> Ard, Ganapatrao, the below is something we need to consider for the
> combination of the NUMA & kexec approaches. It only becomes a problem
> if/when we preserve DT memory nodes in the presence of EFI, though it
> would be nice to not box ourselves into a corner.
>
> On Wed, Jan 20, 2016 at 12:02:58PM +0000, Mark Rutland wrote:
>> On Wed, Jan 20, 2016 at 02:25:07PM +0900, AKASHI Takahiro wrote:
>> > On 01/19/2016 11:01 PM, Mark Rutland wrote:
>> > >For NUMA topology in !ACPI kernels, we might need to also retain and
>> > >parse memory nodes, but only for toplogy information. The kernel would
>> > >still only use memory as described by the EFI memory map.
>> > >
>> > >There's a horrible edge case I've spotted if performing a chain of
>> > >cross-endian kexecs: LE -> BE -> LE, as the BE kernel would have to
>> > >respect the EFI memory map so as to avoid corrupting it for the
>> > >subsequent LE kernel. Other than this I believe everything should just
>> > >work.
>> >
>> > BE kernel doesn't support UEFI yet and cannot access UEFI memmap table. So,
>> > for LE -> BE, we don't use a dtb generated from /sys/firmware/fdt (or /proc/device-tree)
>> > (as in the case of LE -> LE) and require users to provide a dtb file explicitly.
>>
>> As I mentioned above, the problem exists when memory nodes also exist
>> (for describing NUMA topology). In that case the BE kernel would try to
>> use the information from the memory nodes.
>>
>> > For BE -> LE, BE kernel doesn't know wther UEFI memmap table is available or not
>> > and so use the same (explicitly-provided) dtb (as LE -> LE in !UEFI)
>>
>> See above. The problem I imagine is:
>>
>> LE kernel - uses EFI mmap, takes NUMA information from DT memory nodes
>>
>>     v       kexec
>>
>> BE kernel - uses DT memory nodes
>>           - clobbers EFI runtime regions as it sees them as available
>>
>>     v       kexec
>>
>> LE kernel - uses EFI mmap, takes NUMA information from DT memory nodes
>>           - tries to call EFI runtime services, and explodes.
>
> I'm not really sure what the best approach is here, but I thought that
> it would be good to raise awareness of the edge-case.
>

I think we should simply allow the BE kernel to deal with a UEFI
memory map. It only involves a bit of byte swapping (which I already
implemented at some point)

It would require some minor refactoring to make the UEFI init code
separate from all the other bits, but I don't see any major issues
here

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 18/19] arm64: kdump: update a kernel doc
  2016-01-20 14:59                           ` Ard Biesheuvel
@ 2016-01-20 15:04                             ` Mark Rutland
  -1 siblings, 0 replies; 174+ messages in thread
From: Mark Rutland @ 2016-01-20 15:04 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, Jan 20, 2016 at 03:59:08PM +0100, Ard Biesheuvel wrote:
> On 20 January 2016 at 13:36, Mark Rutland <mark.rutland@arm.com> wrote:
> > Ard, Ganapatrao, the below is something we need to consider for the
> > combination of the NUMA & kexec approaches. It only becomes a problem
> > if/when we preserve DT memory nodes in the presence of EFI, though it
> > would be nice to not box ourselves into a corner.
> >
> > On Wed, Jan 20, 2016 at 12:02:58PM +0000, Mark Rutland wrote:
> >> On Wed, Jan 20, 2016 at 02:25:07PM +0900, AKASHI Takahiro wrote:
> >> > On 01/19/2016 11:01 PM, Mark Rutland wrote:
> >> > >For NUMA topology in !ACPI kernels, we might need to also retain and
> >> > >parse memory nodes, but only for toplogy information. The kernel would
> >> > >still only use memory as described by the EFI memory map.
> >> > >
> >> > >There's a horrible edge case I've spotted if performing a chain of
> >> > >cross-endian kexecs: LE -> BE -> LE, as the BE kernel would have to
> >> > >respect the EFI memory map so as to avoid corrupting it for the
> >> > >subsequent LE kernel. Other than this I believe everything should just
> >> > >work.
> >> >
> >> > BE kernel doesn't support UEFI yet and cannot access UEFI memmap table. So,
> >> > for LE -> BE, we don't use a dtb generated from /sys/firmware/fdt (or /proc/device-tree)
> >> > (as in the case of LE -> LE) and require users to provide a dtb file explicitly.
> >>
> >> As I mentioned above, the problem exists when memory nodes also exist
> >> (for describing NUMA topology). In that case the BE kernel would try to
> >> use the information from the memory nodes.
> >>
> >> > For BE -> LE, BE kernel doesn't know wther UEFI memmap table is available or not
> >> > and so use the same (explicitly-provided) dtb (as LE -> LE in !UEFI)
> >>
> >> See above. The problem I imagine is:
> >>
> >> LE kernel - uses EFI mmap, takes NUMA information from DT memory nodes
> >>
> >>     v       kexec
> >>
> >> BE kernel - uses DT memory nodes
> >>           - clobbers EFI runtime regions as it sees them as available
> >>
> >>     v       kexec
> >>
> >> LE kernel - uses EFI mmap, takes NUMA information from DT memory nodes
> >>           - tries to call EFI runtime services, and explodes.
> >
> > I'm not really sure what the best approach is here, but I thought that
> > it would be good to raise awareness of the edge-case.
> >
> 
> I think we should simply allow the BE kernel to deal with a UEFI
> memory map. It only involves a bit of byte swapping (which I already
> implemented at some point)
> 
> It would require some minor refactoring to make the UEFI init code
> separate from all the other bits, but I don't see any major issues
> here

Ok. I had assumed that getting the BE kernel to deal with the UEFI
memory map would be a bit more involved.

I'm happy to be proven wrong. :)

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 18/19] arm64: kdump: update a kernel doc
@ 2016-01-20 15:04                             ` Mark Rutland
  0 siblings, 0 replies; 174+ messages in thread
From: Mark Rutland @ 2016-01-20 15:04 UTC (permalink / raw)
  To: Ard Biesheuvel
  Cc: Geoff Levand, Catalin Marinas, Will Deacon, AKASHI Takahiro,
	James Morse, linux-arm-kernel, Marc Zyngier, Ganapatrao Kulkarni,
	Dave Young, kexec, Christoffer Dall

On Wed, Jan 20, 2016 at 03:59:08PM +0100, Ard Biesheuvel wrote:
> On 20 January 2016 at 13:36, Mark Rutland <mark.rutland@arm.com> wrote:
> > Ard, Ganapatrao, the below is something we need to consider for the
> > combination of the NUMA & kexec approaches. It only becomes a problem
> > if/when we preserve DT memory nodes in the presence of EFI, though it
> > would be nice to not box ourselves into a corner.
> >
> > On Wed, Jan 20, 2016 at 12:02:58PM +0000, Mark Rutland wrote:
> >> On Wed, Jan 20, 2016 at 02:25:07PM +0900, AKASHI Takahiro wrote:
> >> > On 01/19/2016 11:01 PM, Mark Rutland wrote:
> >> > >For NUMA topology in !ACPI kernels, we might need to also retain and
> >> > >parse memory nodes, but only for toplogy information. The kernel would
> >> > >still only use memory as described by the EFI memory map.
> >> > >
> >> > >There's a horrible edge case I've spotted if performing a chain of
> >> > >cross-endian kexecs: LE -> BE -> LE, as the BE kernel would have to
> >> > >respect the EFI memory map so as to avoid corrupting it for the
> >> > >subsequent LE kernel. Other than this I believe everything should just
> >> > >work.
> >> >
> >> > BE kernel doesn't support UEFI yet and cannot access UEFI memmap table. So,
> >> > for LE -> BE, we don't use a dtb generated from /sys/firmware/fdt (or /proc/device-tree)
> >> > (as in the case of LE -> LE) and require users to provide a dtb file explicitly.
> >>
> >> As I mentioned above, the problem exists when memory nodes also exist
> >> (for describing NUMA topology). In that case the BE kernel would try to
> >> use the information from the memory nodes.
> >>
> >> > For BE -> LE, BE kernel doesn't know wther UEFI memmap table is available or not
> >> > and so use the same (explicitly-provided) dtb (as LE -> LE in !UEFI)
> >>
> >> See above. The problem I imagine is:
> >>
> >> LE kernel - uses EFI mmap, takes NUMA information from DT memory nodes
> >>
> >>     v       kexec
> >>
> >> BE kernel - uses DT memory nodes
> >>           - clobbers EFI runtime regions as it sees them as available
> >>
> >>     v       kexec
> >>
> >> LE kernel - uses EFI mmap, takes NUMA information from DT memory nodes
> >>           - tries to call EFI runtime services, and explodes.
> >
> > I'm not really sure what the best approach is here, but I thought that
> > it would be good to raise awareness of the edge-case.
> >
> 
> I think we should simply allow the BE kernel to deal with a UEFI
> memory map. It only involves a bit of byte swapping (which I already
> implemented at some point)
> 
> It would require some minor refactoring to make the UEFI init code
> separate from all the other bits, but I don't see any major issues
> here

Ok. I had assumed that getting the BE kernel to deal with the UEFI
memory map would be a bit more involved.

I'm happy to be proven wrong. :)

Thanks,
Mark.

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 00/19] arm64 kexec kernel patches v13
  2016-01-20  2:56       ` Dave Young
@ 2016-01-20 21:15         ` Geoff Levand
  -1 siblings, 0 replies; 174+ messages in thread
From: Geoff Levand @ 2016-01-20 21:15 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, 2016-01-20 at 10:56 +0800, Dave Young wrote:
> On 01/19/16 at 04:15pm, Geoff Levand wrote:
> > On Tue, 2016-01-19 at 20:32 +0800, Dave Young wrote:
> > > Geoff, another question about kexec-tools part is, can the kexec
> > > -tools code
> > > been written in kernel? We have the infrastructure for
> > > kexec_file_load.
> > 
> > I see no technical reason why the arm64 kernel cannot support
> > kexec_file_load.
> 
> Cool, care to port it to kernel so that we have kexec_file_load only
> in arm64
> we do not need to support both kexec_load and kexec_file_load?
> 

I have no plans to do so.  Feel free to submit a patch.

-Geoff

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 00/19] arm64 kexec kernel patches v13
@ 2016-01-20 21:15         ` Geoff Levand
  0 siblings, 0 replies; 174+ messages in thread
From: Geoff Levand @ 2016-01-20 21:15 UTC (permalink / raw)
  To: Dave Young
  Cc: Mark Rutland, marc.zyngier, Catalin Marinas, Will Deacon,
	AKASHI Takahiro, James Morse, christoffer.dall, kexec,
	linux-arm-kernel

On Wed, 2016-01-20 at 10:56 +0800, Dave Young wrote:
> On 01/19/16 at 04:15pm, Geoff Levand wrote:
> > On Tue, 2016-01-19 at 20:32 +0800, Dave Young wrote:
> > > Geoff, another question about kexec-tools part is, can the kexec
> > > -tools code
> > > been written in kernel? We have the infrastructure for
> > > kexec_file_load.
> > 
> > I see no technical reason why the arm64 kernel cannot support
> > kexec_file_load.
> 
> Cool, care to port it to kernel so that we have kexec_file_load only
> in arm64
> we do not need to support both kexec_load and kexec_file_load?
> 

I have no plans to do so.  Feel free to submit a patch.

-Geoff

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 09/19] Revert "arm64: remove dead code"
  2016-01-15 19:55     ` Mark Rutland
@ 2016-01-20 21:18       ` Geoff Levand
  -1 siblings, 0 replies; 174+ messages in thread
From: Geoff Levand @ 2016-01-20 21:18 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, 2016-01-15 at 19:55 +0000, Mark Rutland wrote:
> On Fri, Jan 15, 2016 at 07:18:37PM +0000, Geoff Levand wrote:
> > This reverts commit b08d4640a3dca68670fc5af2fe9205b395a02388.
> > 
> > Add back the setup_mm_for_reboot() needed for kexec.
> 
> My pagetable rework series [1,2] adds cpu_install_idmap() [3], which
> supersedes setup_mm_for_reboot, and differs only in name.

OK, I'll switch to cpu_install_idmap.

-Geoff

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 09/19] Revert "arm64: remove dead code"
@ 2016-01-20 21:18       ` Geoff Levand
  0 siblings, 0 replies; 174+ messages in thread
From: Geoff Levand @ 2016-01-20 21:18 UTC (permalink / raw)
  To: Mark Rutland
  Cc: marc.zyngier, Catalin Marinas, Will Deacon, AKASHI Takahiro,
	James Morse, linux-arm-kernel, kexec, christoffer.dall

On Fri, 2016-01-15 at 19:55 +0000, Mark Rutland wrote:
> On Fri, Jan 15, 2016 at 07:18:37PM +0000, Geoff Levand wrote:
> > This reverts commit b08d4640a3dca68670fc5af2fe9205b395a02388.
> > 
> > Add back the setup_mm_for_reboot() needed for kexec.
> 
> My pagetable rework series [1,2] adds cpu_install_idmap() [3], which
> supersedes setup_mm_for_reboot, and differs only in name.

OK, I'll switch to cpu_install_idmap.

-Geoff

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 18/19] arm64: kdump: update a kernel doc
  2016-01-20 11:28                       ` Mark Rutland
@ 2016-01-21  2:54                         ` Dave Young
  -1 siblings, 0 replies; 174+ messages in thread
From: Dave Young @ 2016-01-21  2:54 UTC (permalink / raw)
  To: linux-arm-kernel

On 01/20/16 at 11:28am, Mark Rutland wrote:
> On Wed, Jan 20, 2016 at 10:49:46AM +0800, Dave Young wrote:
> > On 01/19/16 at 02:01pm, Mark Rutland wrote:
> > > On Tue, Jan 19, 2016 at 09:45:53PM +0800, Dave Young wrote:
> > > > On 01/19/16 at 12:51pm, Mark Rutland wrote:
> > > > > On Tue, Jan 19, 2016 at 08:28:48PM +0800, Dave Young wrote:
> > > > > > On 01/19/16 at 02:35pm, AKASHI Takahiro wrote:
> > > > > > > On 01/19/2016 10:43 AM, Dave Young wrote:
> > > > > > > >X86 takes another way in latest kexec-tools and kexec_file_load, that is
> > > > > > > >recreating E820 table and pass it to kexec/kdump kernel, if the entries
> > > > > > > >are over E820 limitation then turn to use setup_data list for remain
> > > > > > > >entries.
> > > > > > > 
> > > > > > > Thanks. I will visit x86 code again.
> > > > > > > 
> > > > > > > >I think it is X86 specific. Personally I think device tree property is
> > > > > > > >better.
> > > > > > > 
> > > > > > > Do you think so?
> > > > > > 
> > > > > > I'm not sure it is the best way. For X86 we run into problem with
> > > > > > memmap= design, one example is pci domain X (X>1) need the pci memory
> > > > > > ranges being passed to kdump kernel. When we passed reserved ranges in /proc/iomem
> > > > > > to 2nd kernel we find that cmdline[] array is not big enough.
> > > > > 
> > > > > I'm not sure how PCI ranges relate to the memory map used for normal
> > > > > memory (i.e. RAM), though I'm probably missing some caveat with the way
> > > > > ACPI and UEFI describe PCI. Why does memmap= affect PCI memory?
> > > > 
> > > > Here is the old patch which was rejected in kexec-tools:
> > > > http://lists.infradead.org/pipermail/kexec/2013-February/007924.html
> > > > 
> > > > > 
> > > > > If the kernel got the rest of its system topology from DT, the PCI
> > > > > regions would be described there.
> > > > 
> > > > Yes, if kdump kernel use same DT as 1st kernel.
> > > 
> > > Other than for testing purposes, I don't see why you'd pass the kdump
> > > kernel a DTB inconsistent with that the 1st kernel was passsed (other
> > > than some proerties under /chosen).
> > > 
> > > We added /sys/firmware/fdt specifically to allow the kexec tools to get
> > > the exact DTB the first kernel used. There's no reason for tools to have
> > > to make something up.
> > 
> > Agreed, but kexec-tools has an option to pass in any dtb files. Who knows
> > how one will use it unless dropping the option and use /sys/firmware/fdt
> > unconditionally. 
> 
> I think this is a tangential discussion. I think it's fine to say that
> for kdump we do not expect this -- a user would be shooting themselves
> in the foot if they did. Regardless, I was under the impression that
> kdump was usually set up by distribution-provided init code.
> 
> or kdump, which typically is set up automatically by the OS, 

Yes, usually os setup kdump but with some config file user can tune the kexec
arguments. Anyway I agree that one should do right but if we are sure exact
fdt in 1st kernel is needed we can drop kexec-tools --dtb option. 

> 
> > If we choose to implement kexec_file_load only in kernel, the interfaces
> > provided are kernel, initrd and cmdline. We can always use same dtb.
> 
> There are use-cases where being in complete control of the purgatory
> code is necessary. For example, the next OS might not be Linux (and
> might not accept a DTB, or have different requirements on the initial
> register state).
> 
> Regardless of the need for something like kexec_file_load for kdump in
> Secure Boot environments, there is also a need for kexec_load with the
> user having complete control.

I'm not sure if there are such use cases in arm64 in real life.
But indeed it is a reason kexec_load can exist if there really are such requests. 

> 
> > > > > > Do you think for arm64 only usable memory is necessary to let kdump kernel
> > > > > > know? I'm curious about how arm64 kernel get all memory layout from boot loader,
> > > > > > via UEFI memmap?
> > > > > 
> > > > > When booted via EFI, we use the EFI memory map. The EFI stub handles
> > > > > acquring the relevant information and passing that to the first kernel
> > > > > in the DTB (see Documentation/arm/uefi.txt).
> > > > 
> > > > Ok, thanks for the pointer. So in dt we are just have uefi memmap infomation
> > > > instead of memory nodes details.. 
> > > 
> > > When booted via EFI, yes.
> > > 
> > > For NUMA topology in !ACPI kernels, we might need to also retain and
> > > parse memory nodes, but only for toplogy information. The kernel would
> > > still only use memory as described by the EFI memory map.
> > > 
> > > There's a horrible edge case I've spotted if performing a chain of
> > > cross-endian kexecs: LE -> BE -> LE, as the BE kernel would have to
> > > respect the EFI memory map so as to avoid corrupting it for the
> > > subsequent LE kernel. Other than this I believe everything should just
> > > work.
> > 
> > Firmware do not know kernel endianniess, kernel should respect firmware
> > maps and adapt to it, it sounds like a generic issue not specfic to kexec.
> 
> I agree that this isn't kexec's fault as such, but in the absence of
> kexec, the above issue does not exist, so one can't consider it in
> isolation.
> 
> > > > > A kexec'd kernel should simply inherit that. So long as the DTB and/or
> > > > > UEFI tables in memory are the same, it would be the same as a cold boot.
> > > > 
> > > > For kexec all memory ranges are same, for kdump we need use original reserved
> > > > range with crashkernel= as usable memory and all other orignal usable ranges
> > > > are not usable anymore. 
> > > 
> > > Sure. This is what I believe we should expose with an additional
> > > property under /chosen, while keeping everything else pristine.
> > > 
> > > The crash kernel can then limit itself to that region, while it would
> > > have the information of the full memory map (which it could log and/or
> > > use to drive other dumping).
> > 
> > In this way kernel should be aware it is a kdump booting, it is doable though
> > I feel it is better for kdump kernel in a black box with infomations it
> > can use just like the 1st kernel. Things here is where we choose to cook
> > the memory infomation in boot loader or in kernel itself.
> 
> Sorry, I can't follow what you are trying to say here. Could you
> elaborate?

Hmm, I menas if we prepare a kdump usable uefi memmap then we do not need introduce
the extra dtb property and kdump kernel just works like a normal boot.
I think we have understand each other upon latter part of this mail :)

One additianl issue with the simple way is if it can be used only in kdump kernel
Or it applys to both normal boot and kdump kernel boot so that it becomes a
general interface instead of only for kdump.

That means in 1st kernel we need override all system ram sections from uefi if
the usable chosen property is provided in 1st kernel.

> 
> > > > Is it possible to modify uefi memmap for kdump case?
> > > 
> > > Technically it would be possible, however I don't think it's necessary,
> > > and I think it would be disadvantageous to do so.
> > > 
> > > Describing the range(s) the crash kernel can use in separate properties
> > > under /chosen has a number of advantages.
> > 
> > Ok, I got the points. We have a is_kdump_kernel() by checking if there is
> > elfcorehdr_addr kernel cmdline. This is mainly for some drivers which
> > do not work well in kdump kernel some uncertain reasons. But ideally I
> > think kernel should handle things just like in 1st kernel and avoid to use
> > it. 
> 
> I agree that we should not have kexec/kdump knowledge spread throughout
> the kernel, and that the boot protocol should be uniform with a cold
> boot as far as possible.
> 
> However, requiring userspace or the first kernel to modify
> firmware-provided information has a number of risks and reduces the
> amount of information available to the kdump kernel. To that end I am
> opposed to modifying the memory nodes in the DTB, or to modifying the
> EFI memory map.
> 
> Having a property in the DTB describing the range(s) of memory reserved
> for use by the kdump kernel is vastly simpler, and avoids those risks:
> 
> * It requires a tiny amount of self-contained code in the kdump kernel
>   to parse the property and apply the constraints imposed (i.e. carve up
>   memblock).
> 
>   This is easy to contain in a single function (or at least within a
>   single file), and need not affect drivers or other code.
> 
> * It is uniform regardless of whether the EFI memory map, DT memory
>   nodes, or some other mechanism is used to discover memory in the
>   systems.
> 
>   This makes it easy to impose the restrictions consistently, and is
>   somewhat future-proof.

Ok, considering arm64 specific complexity of the several combind cases
especially this one I would say choosing a simple solution may be the
best choice.

> 
> * Userspace or the first kernel to not need to parse and modify an
>   arbitrary amount of data (which might be in an extended format it
>   doesn't fully understand). There is less risk for this to go wrong.
> 
>   It is far easier to add a property than it is to correctly modify the
>   EFI memory map, memory nodes, or some other data structure. There is
>   less risk, and it is somewhat future-proof.
>   
> * The original memory map information is preserved, even though unused.
>   This may be useful for debugging, and it may turn out that the kdump
>   kernel needs to know about certain portions of the original memory
>   map, even if we are not currently aware of why we would need this.

Thanks
Dave

> 
> Thanks,
> Mark.

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 18/19] arm64: kdump: update a kernel doc
@ 2016-01-21  2:54                         ` Dave Young
  0 siblings, 0 replies; 174+ messages in thread
From: Dave Young @ 2016-01-21  2:54 UTC (permalink / raw)
  To: Mark Rutland
  Cc: ard.biesheuvel, Geoff Levand, Catalin Marinas, Will Deacon,
	AKASHI Takahiro, James Morse, christoffer.dall, marc.zyngier,
	kexec, linux-arm-kernel

On 01/20/16 at 11:28am, Mark Rutland wrote:
> On Wed, Jan 20, 2016 at 10:49:46AM +0800, Dave Young wrote:
> > On 01/19/16 at 02:01pm, Mark Rutland wrote:
> > > On Tue, Jan 19, 2016 at 09:45:53PM +0800, Dave Young wrote:
> > > > On 01/19/16 at 12:51pm, Mark Rutland wrote:
> > > > > On Tue, Jan 19, 2016 at 08:28:48PM +0800, Dave Young wrote:
> > > > > > On 01/19/16 at 02:35pm, AKASHI Takahiro wrote:
> > > > > > > On 01/19/2016 10:43 AM, Dave Young wrote:
> > > > > > > >X86 takes another way in latest kexec-tools and kexec_file_load, that is
> > > > > > > >recreating E820 table and pass it to kexec/kdump kernel, if the entries
> > > > > > > >are over E820 limitation then turn to use setup_data list for remain
> > > > > > > >entries.
> > > > > > > 
> > > > > > > Thanks. I will visit x86 code again.
> > > > > > > 
> > > > > > > >I think it is X86 specific. Personally I think device tree property is
> > > > > > > >better.
> > > > > > > 
> > > > > > > Do you think so?
> > > > > > 
> > > > > > I'm not sure it is the best way. For X86 we run into problem with
> > > > > > memmap= design, one example is pci domain X (X>1) need the pci memory
> > > > > > ranges being passed to kdump kernel. When we passed reserved ranges in /proc/iomem
> > > > > > to 2nd kernel we find that cmdline[] array is not big enough.
> > > > > 
> > > > > I'm not sure how PCI ranges relate to the memory map used for normal
> > > > > memory (i.e. RAM), though I'm probably missing some caveat with the way
> > > > > ACPI and UEFI describe PCI. Why does memmap= affect PCI memory?
> > > > 
> > > > Here is the old patch which was rejected in kexec-tools:
> > > > http://lists.infradead.org/pipermail/kexec/2013-February/007924.html
> > > > 
> > > > > 
> > > > > If the kernel got the rest of its system topology from DT, the PCI
> > > > > regions would be described there.
> > > > 
> > > > Yes, if kdump kernel use same DT as 1st kernel.
> > > 
> > > Other than for testing purposes, I don't see why you'd pass the kdump
> > > kernel a DTB inconsistent with that the 1st kernel was passsed (other
> > > than some proerties under /chosen).
> > > 
> > > We added /sys/firmware/fdt specifically to allow the kexec tools to get
> > > the exact DTB the first kernel used. There's no reason for tools to have
> > > to make something up.
> > 
> > Agreed, but kexec-tools has an option to pass in any dtb files. Who knows
> > how one will use it unless dropping the option and use /sys/firmware/fdt
> > unconditionally. 
> 
> I think this is a tangential discussion. I think it's fine to say that
> for kdump we do not expect this -- a user would be shooting themselves
> in the foot if they did. Regardless, I was under the impression that
> kdump was usually set up by distribution-provided init code.
> 
> or kdump, which typically is set up automatically by the OS, 

Yes, usually os setup kdump but with some config file user can tune the kexec
arguments. Anyway I agree that one should do right but if we are sure exact
fdt in 1st kernel is needed we can drop kexec-tools --dtb option. 

> 
> > If we choose to implement kexec_file_load only in kernel, the interfaces
> > provided are kernel, initrd and cmdline. We can always use same dtb.
> 
> There are use-cases where being in complete control of the purgatory
> code is necessary. For example, the next OS might not be Linux (and
> might not accept a DTB, or have different requirements on the initial
> register state).
> 
> Regardless of the need for something like kexec_file_load for kdump in
> Secure Boot environments, there is also a need for kexec_load with the
> user having complete control.

I'm not sure if there are such use cases in arm64 in real life.
But indeed it is a reason kexec_load can exist if there really are such requests. 

> 
> > > > > > Do you think for arm64 only usable memory is necessary to let kdump kernel
> > > > > > know? I'm curious about how arm64 kernel get all memory layout from boot loader,
> > > > > > via UEFI memmap?
> > > > > 
> > > > > When booted via EFI, we use the EFI memory map. The EFI stub handles
> > > > > acquring the relevant information and passing that to the first kernel
> > > > > in the DTB (see Documentation/arm/uefi.txt).
> > > > 
> > > > Ok, thanks for the pointer. So in dt we are just have uefi memmap infomation
> > > > instead of memory nodes details.. 
> > > 
> > > When booted via EFI, yes.
> > > 
> > > For NUMA topology in !ACPI kernels, we might need to also retain and
> > > parse memory nodes, but only for toplogy information. The kernel would
> > > still only use memory as described by the EFI memory map.
> > > 
> > > There's a horrible edge case I've spotted if performing a chain of
> > > cross-endian kexecs: LE -> BE -> LE, as the BE kernel would have to
> > > respect the EFI memory map so as to avoid corrupting it for the
> > > subsequent LE kernel. Other than this I believe everything should just
> > > work.
> > 
> > Firmware do not know kernel endianniess, kernel should respect firmware
> > maps and adapt to it, it sounds like a generic issue not specfic to kexec.
> 
> I agree that this isn't kexec's fault as such, but in the absence of
> kexec, the above issue does not exist, so one can't consider it in
> isolation.
> 
> > > > > A kexec'd kernel should simply inherit that. So long as the DTB and/or
> > > > > UEFI tables in memory are the same, it would be the same as a cold boot.
> > > > 
> > > > For kexec all memory ranges are same, for kdump we need use original reserved
> > > > range with crashkernel= as usable memory and all other orignal usable ranges
> > > > are not usable anymore. 
> > > 
> > > Sure. This is what I believe we should expose with an additional
> > > property under /chosen, while keeping everything else pristine.
> > > 
> > > The crash kernel can then limit itself to that region, while it would
> > > have the information of the full memory map (which it could log and/or
> > > use to drive other dumping).
> > 
> > In this way kernel should be aware it is a kdump booting, it is doable though
> > I feel it is better for kdump kernel in a black box with infomations it
> > can use just like the 1st kernel. Things here is where we choose to cook
> > the memory infomation in boot loader or in kernel itself.
> 
> Sorry, I can't follow what you are trying to say here. Could you
> elaborate?

Hmm, I menas if we prepare a kdump usable uefi memmap then we do not need introduce
the extra dtb property and kdump kernel just works like a normal boot.
I think we have understand each other upon latter part of this mail :)

One additianl issue with the simple way is if it can be used only in kdump kernel
Or it applys to both normal boot and kdump kernel boot so that it becomes a
general interface instead of only for kdump.

That means in 1st kernel we need override all system ram sections from uefi if
the usable chosen property is provided in 1st kernel.

> 
> > > > Is it possible to modify uefi memmap for kdump case?
> > > 
> > > Technically it would be possible, however I don't think it's necessary,
> > > and I think it would be disadvantageous to do so.
> > > 
> > > Describing the range(s) the crash kernel can use in separate properties
> > > under /chosen has a number of advantages.
> > 
> > Ok, I got the points. We have a is_kdump_kernel() by checking if there is
> > elfcorehdr_addr kernel cmdline. This is mainly for some drivers which
> > do not work well in kdump kernel some uncertain reasons. But ideally I
> > think kernel should handle things just like in 1st kernel and avoid to use
> > it. 
> 
> I agree that we should not have kexec/kdump knowledge spread throughout
> the kernel, and that the boot protocol should be uniform with a cold
> boot as far as possible.
> 
> However, requiring userspace or the first kernel to modify
> firmware-provided information has a number of risks and reduces the
> amount of information available to the kdump kernel. To that end I am
> opposed to modifying the memory nodes in the DTB, or to modifying the
> EFI memory map.
> 
> Having a property in the DTB describing the range(s) of memory reserved
> for use by the kdump kernel is vastly simpler, and avoids those risks:
> 
> * It requires a tiny amount of self-contained code in the kdump kernel
>   to parse the property and apply the constraints imposed (i.e. carve up
>   memblock).
> 
>   This is easy to contain in a single function (or at least within a
>   single file), and need not affect drivers or other code.
> 
> * It is uniform regardless of whether the EFI memory map, DT memory
>   nodes, or some other mechanism is used to discover memory in the
>   systems.
> 
>   This makes it easy to impose the restrictions consistently, and is
>   somewhat future-proof.

Ok, considering arm64 specific complexity of the several combind cases
especially this one I would say choosing a simple solution may be the
best choice.

> 
> * Userspace or the first kernel to not need to parse and modify an
>   arbitrary amount of data (which might be in an extended format it
>   doesn't fully understand). There is less risk for this to go wrong.
> 
>   It is far easier to add a property than it is to correctly modify the
>   EFI memory map, memory nodes, or some other data structure. There is
>   less risk, and it is somewhat future-proof.
>   
> * The original memory map information is preserved, even though unused.
>   This may be useful for debugging, and it may turn out that the kdump
>   kernel needs to know about certain portions of the original memory
>   map, even if we are not currently aware of why we would need this.

Thanks
Dave

> 
> Thanks,
> Mark.

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 18/19] arm64: kdump: update a kernel doc
  2016-01-20 11:54                           ` Mark Rutland
@ 2016-01-21  2:57                             ` Dave Young
  -1 siblings, 0 replies; 174+ messages in thread
From: Dave Young @ 2016-01-21  2:57 UTC (permalink / raw)
  To: linux-arm-kernel

Hi, Mark

On 01/20/16 at 11:54am, Mark Rutland wrote:
> On Wed, Jan 20, 2016 at 02:38:56PM +0800, Dave Young wrote:
> > Maybe I did not say it clearly, I prefer kexec syscall/tool to modifiy dtb
> > or uefi-memmap so that we do not need any extra kernel cmdline.
> 
> I am strongly opposed to modifying the FW-provided memroy map
> information, for the reasons I expressed in other replies.
> 
> What are your concerns with a property under /chosen?
> 
> > For x86 we would like to drop the memmap= usage in kexec-tools but we can not
> > do that for a compatibility problem about calgary iommu. So that currently
> > kexec-tools supports both recreating E820 maps and passing memmap=.
> > 
> > We should think it carefully because it will be hard to remove once we support it.
> 
> I agree that we don't want a plethora of solutions that we have to
> support forever.
> 
> > IMO handling it in code is better than using an external interface.
> 
> I'm not sure what you mean by this. What is the "external interface",
> and which code do you beleive it is better to handle this in?

I think we have covered all these in another reply. Let's continue
the discussion if needed in that thread.

Thanks
Dave

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 18/19] arm64: kdump: update a kernel doc
@ 2016-01-21  2:57                             ` Dave Young
  0 siblings, 0 replies; 174+ messages in thread
From: Dave Young @ 2016-01-21  2:57 UTC (permalink / raw)
  To: Mark Rutland
  Cc: ard.biesheuvel, Geoff Levand, Catalin Marinas, Will Deacon,
	AKASHI Takahiro, James Morse, christoffer.dall, marc.zyngier,
	kexec, linux-arm-kernel

Hi, Mark

On 01/20/16 at 11:54am, Mark Rutland wrote:
> On Wed, Jan 20, 2016 at 02:38:56PM +0800, Dave Young wrote:
> > Maybe I did not say it clearly, I prefer kexec syscall/tool to modifiy dtb
> > or uefi-memmap so that we do not need any extra kernel cmdline.
> 
> I am strongly opposed to modifying the FW-provided memroy map
> information, for the reasons I expressed in other replies.
> 
> What are your concerns with a property under /chosen?
> 
> > For x86 we would like to drop the memmap= usage in kexec-tools but we can not
> > do that for a compatibility problem about calgary iommu. So that currently
> > kexec-tools supports both recreating E820 maps and passing memmap=.
> > 
> > We should think it carefully because it will be hard to remove once we support it.
> 
> I agree that we don't want a plethora of solutions that we have to
> support forever.
> 
> > IMO handling it in code is better than using an external interface.
> 
> I'm not sure what you mean by this. What is the "external interface",
> and which code do you beleive it is better to handle this in?

I think we have covered all these in another reply. Let's continue
the discussion if needed in that thread.

Thanks
Dave

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 18/19] arm64: kdump: update a kernel doc
  2016-01-20 11:54                           ` Mark Rutland
@ 2016-01-21  3:03                             ` Dave Young
  -1 siblings, 0 replies; 174+ messages in thread
From: Dave Young @ 2016-01-21  3:03 UTC (permalink / raw)
  To: linux-arm-kernel

> I am strongly opposed to modifying the FW-provided memroy map
> information, for the reasons I expressed in other replies.
> 
> What are your concerns with a property under /chosen?

If we choose a simpler way between memmap= and /chosen property then
I think a property in /chosen looks better to me.

Thanks
Dave

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 18/19] arm64: kdump: update a kernel doc
@ 2016-01-21  3:03                             ` Dave Young
  0 siblings, 0 replies; 174+ messages in thread
From: Dave Young @ 2016-01-21  3:03 UTC (permalink / raw)
  To: Mark Rutland
  Cc: ard.biesheuvel, Geoff Levand, Catalin Marinas, Will Deacon,
	AKASHI Takahiro, James Morse, christoffer.dall, marc.zyngier,
	kexec, linux-arm-kernel

> I am strongly opposed to modifying the FW-provided memroy map
> information, for the reasons I expressed in other replies.
> 
> What are your concerns with a property under /chosen?

If we choose a simpler way between memmap= and /chosen property then
I think a property in /chosen looks better to me.

Thanks
Dave

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 18/19] arm64: kdump: update a kernel doc
  2016-01-20 14:59                           ` Ard Biesheuvel
@ 2016-01-21  5:43                             ` AKASHI Takahiro
  -1 siblings, 0 replies; 174+ messages in thread
From: AKASHI Takahiro @ 2016-01-21  5:43 UTC (permalink / raw)
  To: linux-arm-kernel

On 01/20/2016 11:59 PM, Ard Biesheuvel wrote:
> On 20 January 2016 at 13:36, Mark Rutland <mark.rutland@arm.com> wrote:
>> Ard, Ganapatrao, the below is something we need to consider for the
>> combination of the NUMA & kexec approaches. It only becomes a problem
>> if/when we preserve DT memory nodes in the presence of EFI, though it
>> would be nice to not box ourselves into a corner.
>>
>> On Wed, Jan 20, 2016 at 12:02:58PM +0000, Mark Rutland wrote:
>>> On Wed, Jan 20, 2016 at 02:25:07PM +0900, AKASHI Takahiro wrote:
>>>> On 01/19/2016 11:01 PM, Mark Rutland wrote:
>>>>> For NUMA topology in !ACPI kernels, we might need to also retain and
>>>>> parse memory nodes, but only for toplogy information. The kernel would
>>>>> still only use memory as described by the EFI memory map.
>>>>>
>>>>> There's a horrible edge case I've spotted if performing a chain of
>>>>> cross-endian kexecs: LE -> BE -> LE, as the BE kernel would have to
>>>>> respect the EFI memory map so as to avoid corrupting it for the
>>>>> subsequent LE kernel. Other than this I believe everything should just
>>>>> work.
>>>>
>>>> BE kernel doesn't support UEFI yet and cannot access UEFI memmap table. So,
>>>> for LE -> BE, we don't use a dtb generated from /sys/firmware/fdt (or /proc/device-tree)
>>>> (as in the case of LE -> LE) and require users to provide a dtb file explicitly.
>>>
>>> As I mentioned above, the problem exists when memory nodes also exist
>>> (for describing NUMA topology). In that case the BE kernel would try to
>>> use the information from the memory nodes.
>>>
>>>> For BE -> LE, BE kernel doesn't know wther UEFI memmap table is available or not
>>>> and so use the same (explicitly-provided) dtb (as LE -> LE in !UEFI)
>>>
>>> See above. The problem I imagine is:
>>>
>>> LE kernel - uses EFI mmap, takes NUMA information from DT memory nodes
>>>
>>>      v       kexec
>>>
>>> BE kernel - uses DT memory nodes
>>>            - clobbers EFI runtime regions as it sees them as available
>>>
>>>      v       kexec
>>>
>>> LE kernel - uses EFI mmap, takes NUMA information from DT memory nodes
>>>            - tries to call EFI runtime services, and explodes.
>>
>> I'm not really sure what the best approach is here, but I thought that
>> it would be good to raise awareness of the edge-case.
>>
>
> I think we should simply allow the BE kernel to deal with a UEFI
> memory map. It only involves a bit of byte swapping (which I already
> implemented at some point)

Just from my curiosity,
will runtime services be also available on BE kernel with LE uefi?

-Takahiro AKASHI

> It would require some minor refactoring to make the UEFI init code
> separate from all the other bits, but I don't see any major issues
> here
>

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 18/19] arm64: kdump: update a kernel doc
@ 2016-01-21  5:43                             ` AKASHI Takahiro
  0 siblings, 0 replies; 174+ messages in thread
From: AKASHI Takahiro @ 2016-01-21  5:43 UTC (permalink / raw)
  To: Ard Biesheuvel, Mark Rutland
  Cc: Geoff Levand, Catalin Marinas, Will Deacon, Marc Zyngier,
	James Morse, linux-arm-kernel, Ganapatrao Kulkarni, Dave Young,
	kexec, Christoffer Dall

On 01/20/2016 11:59 PM, Ard Biesheuvel wrote:
> On 20 January 2016 at 13:36, Mark Rutland <mark.rutland@arm.com> wrote:
>> Ard, Ganapatrao, the below is something we need to consider for the
>> combination of the NUMA & kexec approaches. It only becomes a problem
>> if/when we preserve DT memory nodes in the presence of EFI, though it
>> would be nice to not box ourselves into a corner.
>>
>> On Wed, Jan 20, 2016 at 12:02:58PM +0000, Mark Rutland wrote:
>>> On Wed, Jan 20, 2016 at 02:25:07PM +0900, AKASHI Takahiro wrote:
>>>> On 01/19/2016 11:01 PM, Mark Rutland wrote:
>>>>> For NUMA topology in !ACPI kernels, we might need to also retain and
>>>>> parse memory nodes, but only for toplogy information. The kernel would
>>>>> still only use memory as described by the EFI memory map.
>>>>>
>>>>> There's a horrible edge case I've spotted if performing a chain of
>>>>> cross-endian kexecs: LE -> BE -> LE, as the BE kernel would have to
>>>>> respect the EFI memory map so as to avoid corrupting it for the
>>>>> subsequent LE kernel. Other than this I believe everything should just
>>>>> work.
>>>>
>>>> BE kernel doesn't support UEFI yet and cannot access UEFI memmap table. So,
>>>> for LE -> BE, we don't use a dtb generated from /sys/firmware/fdt (or /proc/device-tree)
>>>> (as in the case of LE -> LE) and require users to provide a dtb file explicitly.
>>>
>>> As I mentioned above, the problem exists when memory nodes also exist
>>> (for describing NUMA topology). In that case the BE kernel would try to
>>> use the information from the memory nodes.
>>>
>>>> For BE -> LE, BE kernel doesn't know wther UEFI memmap table is available or not
>>>> and so use the same (explicitly-provided) dtb (as LE -> LE in !UEFI)
>>>
>>> See above. The problem I imagine is:
>>>
>>> LE kernel - uses EFI mmap, takes NUMA information from DT memory nodes
>>>
>>>      v       kexec
>>>
>>> BE kernel - uses DT memory nodes
>>>            - clobbers EFI runtime regions as it sees them as available
>>>
>>>      v       kexec
>>>
>>> LE kernel - uses EFI mmap, takes NUMA information from DT memory nodes
>>>            - tries to call EFI runtime services, and explodes.
>>
>> I'm not really sure what the best approach is here, but I thought that
>> it would be good to raise awareness of the edge-case.
>>
>
> I think we should simply allow the BE kernel to deal with a UEFI
> memory map. It only involves a bit of byte swapping (which I already
> implemented at some point)

Just from my curiosity,
will runtime services be also available on BE kernel with LE uefi?

-Takahiro AKASHI

> It would require some minor refactoring to make the UEFI init code
> separate from all the other bits, but I don't see any major issues
> here
>

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 18/19] arm64: kdump: update a kernel doc
  2016-01-20 11:49                         ` Mark Rutland
@ 2016-01-21  6:53                           ` AKASHI Takahiro
  -1 siblings, 0 replies; 174+ messages in thread
From: AKASHI Takahiro @ 2016-01-21  6:53 UTC (permalink / raw)
  To: linux-arm-kernel

On 01/20/2016 08:49 PM, Mark Rutland wrote:
> On Wed, Jan 20, 2016 at 03:07:53PM +0900, AKASHI Takahiro wrote:
>> On 01/20/2016 11:49 AM, Dave Young wrote:
>>> On 01/19/16 at 02:01pm, Mark Rutland wrote:
>>>> On Tue, Jan 19, 2016 at 09:45:53PM +0800, Dave Young wrote:
>>>>> On 01/19/16 at 12:51pm, Mark Rutland wrote:
>>>>>> On Tue, Jan 19, 2016 at 08:28:48PM +0800, Dave Young wrote:
>>>>>>> On 01/19/16 at 02:35pm, AKASHI Takahiro wrote:
>>>>>>>> On 01/19/2016 10:43 AM, Dave Young wrote:
>>>>>>>>> X86 takes another way in latest kexec-tools and kexec_file_load, that is
>>>>>>>>> recreating E820 table and pass it to kexec/kdump kernel, if the entries
>>>>>>>>> are over E820 limitation then turn to use setup_data list for remain
>>>>>>>>> entries.
>>>>>>>>
>>>>>>>> Thanks. I will visit x86 code again.
>>>>>>>>
>>>>>>>>> I think it is X86 specific. Personally I think device tree property is
>>>>>>>>> better.
>>>>>>>>
>>>>>>>> Do you think so?
>>>>>>>
>>>>>>> I'm not sure it is the best way. For X86 we run into problem with
>>>>>>> memmap= design, one example is pci domain X (X>1) need the pci memory
>>>>>>> ranges being passed to kdump kernel. When we passed reserved ranges in /proc/iomem
>>>>>>> to 2nd kernel we find that cmdline[] array is not big enough.
>>>>>>
>>>>>> I'm not sure how PCI ranges relate to the memory map used for normal
>>>>>> memory (i.e. RAM), though I'm probably missing some caveat with the way
>>>>>> ACPI and UEFI describe PCI. Why does memmap= affect PCI memory?
>>>>>
>>>>> Here is the old patch which was rejected in kexec-tools:
>>>>> http://lists.infradead.org/pipermail/kexec/2013-February/007924.html
>>>>>
>>>>>>
>>>>>> If the kernel got the rest of its system topology from DT, the PCI
>>>>>> regions would be described there.
>>>>>
>>>>> Yes, if kdump kernel use same DT as 1st kernel.
>>>>
>>>> Other than for testing purposes, I don't see why you'd pass the kdump
>>>> kernel a DTB inconsistent with that the 1st kernel was passsed (other
>>>> than some proerties under /chosen).
>>>>
>>>> We added /sys/firmware/fdt specifically to allow the kexec tools to get
>>>> the exact DTB the first kernel used. There's no reason for tools to have
>>>> to make something up.
>>>
>>> Agreed, but kexec-tools has an option to pass in any dtb files. Who knows
>>> how one will use it unless dropping the option and use /sys/firmware/fdt
>>> unconditionally.
>>
>> As a matter of fact, specifying proper command line parameters as well as
>> dtb is partly users' responsibility for kdump to work correctly.
>> (especially for BE kernel)
>>
>>> If we choose to implement kexec_file_load only in kernel, the interfaces
>>> provided are kernel, initrd and cmdline. We can always use same dtb.
>>
>> I would say that we can always use the same dtb even with kexec_load
>> from user's perspective. Right?
>
> No.
>
> This breaks using kexec for boot-loader purposes, and imposes a policy.

What kind of policy?
I said "can", but if we want to use other setting/configuration, we can
still have a full control.

> For better or worse kexec_file_load has always imposed a constrained
> Linux-only policy, so that's a different story.
>
>>>> There's a horrible edge case I've spotted if performing a chain of
>>>> cross-endian kexecs: LE -> BE -> LE, as the BE kernel would have to
>>>> respect the EFI memory map so as to avoid corrupting it for the
>>>> subsequent LE kernel. Other than this I believe everything should just
>>>> work.
>>>
>>> Firmware do not know kernel endianniess, kernel should respect firmware
>>> maps and adapt to it, it sounds like a generic issue not specfic to kexec.
>>
>> On arm64, a kernel image header has a bit field to specify the image's endianness.
>> Anyway, our current implementation replies on a user-supplied dtb to start BE kernel.
>
> The firmware should _never_ care about the kernel's endianness. The
> bootlaoder or first kernel shouldn't care about the next kernel's
> endianness apart from in exceptional circumstances. The DTB for a LE
> kernel should look identical to that passed to a BE kernel.

Please note that I didn't say anything different from your last two statements.
The current arm64 kexec implementation doesn't do anything specific to BE,
but as far as BE kernel doesn't support UEFI, users are responsible for
providing a proper dtb.

> In my mind, the only valid reason to look at that bit is so that
> bootloaders can provide a warning if the CPU does not implement that
> endianness.
>
> The issue I mention above should be solved by changes to the BE kernel.
>
>>>>> Is it possible to modify uefi memmap for kdump case?
>>>>
>>>> Technically it would be possible, however I don't think it's necessary,
>>>> and I think it would be disadvantageous to do so.
>>>>
>>>> Describing the range(s) the crash kernel can use in separate properties
>>>> under /chosen has a number of advantages.
>>>
>>> Ok, I got the points. We have a is_kdump_kernel() by checking if there is
>>> elfcorehdr_addr kernel cmdline. This is mainly for some drivers which
>>> do not work well in kdump kernel some uncertain reasons. But ideally I
>>> think kernel should handle things just like in 1st kernel and avoid to use
>>> it.
>>
>> So I'm not still sure about what are advantages of a property under /chosen
>> over "memmap=" kernel parameter.
>> Both are simple and can have the same effect with minimizing changes to dtb.
>> (But if, in the latter case, we have to provide *all* the memory-related information
>> through "memmap=" parameters, it would be much complicated.)
>
> The reason I prefer a property over command line additions include:

Take some examples:
(a) a property under /chosen
   {
     chosen = {
       cmdline = "elfcorehdr=AA at BB maxcpus=1 ...";
     }
     usable-memory = <XX YY>;
     memory {
       ...
     }
   }

(b) a kernel command line parameter
   (I use the same name, "usable-memory", to show the similarity. may use another name though.)
   {
     chosen = {
       cmdline = "elfcorehdr=AA at BB maxcpus=1 usable-memory=YY at XX ...";
     }
     memory {
       ...
     }
   }

> * It keeps the command line simple (as you mention the opposite is
>    "complicated").

I think both are simple.

> * It is logically separate from options the user may pass to the kernel
>    in that the restricted region(s) of memory avaialble are effectively
>    properties of the system (in that the crashed OS is part of the system
>    state).

"elfcorehdr=" parameter already breaks your point.
"elfcorehdr=" looks to be, what you say, a system property, and is actually
added by kexec-tools on all architectures, and "usable-memory", whether it is
a DT property or a kernel parameter, will also be added by kexec-tools.
(Users don't have to care.)

> * The semantics of the command line parsing can change subtly over time
>    (for example, see 51e158c12aca3c9a, which terminates command line
>    parseing at "--"). Maknig sure that a command line option will
>    actually be parsed by the next kernel is not trivial.
>
>    Keeping this information isolated from the command line is more
>    robust.

Even so, who wants to use kdump without testing?
and this is not a kdump specific issue.

> * Addition of a property is a self-contained operation, that doesn't
>    require any knowledge about the command line.

I don't get your point here.
For a kernel parameter, early_param() can encapsulate all the stuffs necessary.
Once the kernel recognizes a usable memory region, limiting available
memory should be done in the exact same way.

Thanks,
-Takahiro AKASHI


> Thanks,
> Mark.
>

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 18/19] arm64: kdump: update a kernel doc
@ 2016-01-21  6:53                           ` AKASHI Takahiro
  0 siblings, 0 replies; 174+ messages in thread
From: AKASHI Takahiro @ 2016-01-21  6:53 UTC (permalink / raw)
  To: Mark Rutland
  Cc: ard.biesheuvel, Geoff Levand, Catalin Marinas, Will Deacon,
	marc.zyngier, James Morse, christoffer.dall, Dave Young, kexec,
	linux-arm-kernel

On 01/20/2016 08:49 PM, Mark Rutland wrote:
> On Wed, Jan 20, 2016 at 03:07:53PM +0900, AKASHI Takahiro wrote:
>> On 01/20/2016 11:49 AM, Dave Young wrote:
>>> On 01/19/16 at 02:01pm, Mark Rutland wrote:
>>>> On Tue, Jan 19, 2016 at 09:45:53PM +0800, Dave Young wrote:
>>>>> On 01/19/16 at 12:51pm, Mark Rutland wrote:
>>>>>> On Tue, Jan 19, 2016 at 08:28:48PM +0800, Dave Young wrote:
>>>>>>> On 01/19/16 at 02:35pm, AKASHI Takahiro wrote:
>>>>>>>> On 01/19/2016 10:43 AM, Dave Young wrote:
>>>>>>>>> X86 takes another way in latest kexec-tools and kexec_file_load, that is
>>>>>>>>> recreating E820 table and pass it to kexec/kdump kernel, if the entries
>>>>>>>>> are over E820 limitation then turn to use setup_data list for remain
>>>>>>>>> entries.
>>>>>>>>
>>>>>>>> Thanks. I will visit x86 code again.
>>>>>>>>
>>>>>>>>> I think it is X86 specific. Personally I think device tree property is
>>>>>>>>> better.
>>>>>>>>
>>>>>>>> Do you think so?
>>>>>>>
>>>>>>> I'm not sure it is the best way. For X86 we run into problem with
>>>>>>> memmap= design, one example is pci domain X (X>1) need the pci memory
>>>>>>> ranges being passed to kdump kernel. When we passed reserved ranges in /proc/iomem
>>>>>>> to 2nd kernel we find that cmdline[] array is not big enough.
>>>>>>
>>>>>> I'm not sure how PCI ranges relate to the memory map used for normal
>>>>>> memory (i.e. RAM), though I'm probably missing some caveat with the way
>>>>>> ACPI and UEFI describe PCI. Why does memmap= affect PCI memory?
>>>>>
>>>>> Here is the old patch which was rejected in kexec-tools:
>>>>> http://lists.infradead.org/pipermail/kexec/2013-February/007924.html
>>>>>
>>>>>>
>>>>>> If the kernel got the rest of its system topology from DT, the PCI
>>>>>> regions would be described there.
>>>>>
>>>>> Yes, if kdump kernel use same DT as 1st kernel.
>>>>
>>>> Other than for testing purposes, I don't see why you'd pass the kdump
>>>> kernel a DTB inconsistent with that the 1st kernel was passsed (other
>>>> than some proerties under /chosen).
>>>>
>>>> We added /sys/firmware/fdt specifically to allow the kexec tools to get
>>>> the exact DTB the first kernel used. There's no reason for tools to have
>>>> to make something up.
>>>
>>> Agreed, but kexec-tools has an option to pass in any dtb files. Who knows
>>> how one will use it unless dropping the option and use /sys/firmware/fdt
>>> unconditionally.
>>
>> As a matter of fact, specifying proper command line parameters as well as
>> dtb is partly users' responsibility for kdump to work correctly.
>> (especially for BE kernel)
>>
>>> If we choose to implement kexec_file_load only in kernel, the interfaces
>>> provided are kernel, initrd and cmdline. We can always use same dtb.
>>
>> I would say that we can always use the same dtb even with kexec_load
>> from user's perspective. Right?
>
> No.
>
> This breaks using kexec for boot-loader purposes, and imposes a policy.

What kind of policy?
I said "can", but if we want to use other setting/configuration, we can
still have a full control.

> For better or worse kexec_file_load has always imposed a constrained
> Linux-only policy, so that's a different story.
>
>>>> There's a horrible edge case I've spotted if performing a chain of
>>>> cross-endian kexecs: LE -> BE -> LE, as the BE kernel would have to
>>>> respect the EFI memory map so as to avoid corrupting it for the
>>>> subsequent LE kernel. Other than this I believe everything should just
>>>> work.
>>>
>>> Firmware do not know kernel endianniess, kernel should respect firmware
>>> maps and adapt to it, it sounds like a generic issue not specfic to kexec.
>>
>> On arm64, a kernel image header has a bit field to specify the image's endianness.
>> Anyway, our current implementation replies on a user-supplied dtb to start BE kernel.
>
> The firmware should _never_ care about the kernel's endianness. The
> bootlaoder or first kernel shouldn't care about the next kernel's
> endianness apart from in exceptional circumstances. The DTB for a LE
> kernel should look identical to that passed to a BE kernel.

Please note that I didn't say anything different from your last two statements.
The current arm64 kexec implementation doesn't do anything specific to BE,
but as far as BE kernel doesn't support UEFI, users are responsible for
providing a proper dtb.

> In my mind, the only valid reason to look at that bit is so that
> bootloaders can provide a warning if the CPU does not implement that
> endianness.
>
> The issue I mention above should be solved by changes to the BE kernel.
>
>>>>> Is it possible to modify uefi memmap for kdump case?
>>>>
>>>> Technically it would be possible, however I don't think it's necessary,
>>>> and I think it would be disadvantageous to do so.
>>>>
>>>> Describing the range(s) the crash kernel can use in separate properties
>>>> under /chosen has a number of advantages.
>>>
>>> Ok, I got the points. We have a is_kdump_kernel() by checking if there is
>>> elfcorehdr_addr kernel cmdline. This is mainly for some drivers which
>>> do not work well in kdump kernel some uncertain reasons. But ideally I
>>> think kernel should handle things just like in 1st kernel and avoid to use
>>> it.
>>
>> So I'm not still sure about what are advantages of a property under /chosen
>> over "memmap=" kernel parameter.
>> Both are simple and can have the same effect with minimizing changes to dtb.
>> (But if, in the latter case, we have to provide *all* the memory-related information
>> through "memmap=" parameters, it would be much complicated.)
>
> The reason I prefer a property over command line additions include:

Take some examples:
(a) a property under /chosen
   {
     chosen = {
       cmdline = "elfcorehdr=AA@BB maxcpus=1 ...";
     }
     usable-memory = <XX YY>;
     memory {
       ...
     }
   }

(b) a kernel command line parameter
   (I use the same name, "usable-memory", to show the similarity. may use another name though.)
   {
     chosen = {
       cmdline = "elfcorehdr=AA@BB maxcpus=1 usable-memory=YY@XX ...";
     }
     memory {
       ...
     }
   }

> * It keeps the command line simple (as you mention the opposite is
>    "complicated").

I think both are simple.

> * It is logically separate from options the user may pass to the kernel
>    in that the restricted region(s) of memory avaialble are effectively
>    properties of the system (in that the crashed OS is part of the system
>    state).

"elfcorehdr=" parameter already breaks your point.
"elfcorehdr=" looks to be, what you say, a system property, and is actually
added by kexec-tools on all architectures, and "usable-memory", whether it is
a DT property or a kernel parameter, will also be added by kexec-tools.
(Users don't have to care.)

> * The semantics of the command line parsing can change subtly over time
>    (for example, see 51e158c12aca3c9a, which terminates command line
>    parseing at "--"). Maknig sure that a command line option will
>    actually be parsed by the next kernel is not trivial.
>
>    Keeping this information isolated from the command line is more
>    robust.

Even so, who wants to use kdump without testing?
and this is not a kdump specific issue.

> * Addition of a property is a self-contained operation, that doesn't
>    require any knowledge about the command line.

I don't get your point here.
For a kernel parameter, early_param() can encapsulate all the stuffs necessary.
Once the kernel recognizes a usable memory region, limiting available
memory should be done in the exact same way.

Thanks,
-Takahiro AKASHI


> Thanks,
> Mark.
>

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 18/19] arm64: kdump: update a kernel doc
  2016-01-21  6:53                           ` AKASHI Takahiro
@ 2016-01-21 12:02                             ` Mark Rutland
  -1 siblings, 0 replies; 174+ messages in thread
From: Mark Rutland @ 2016-01-21 12:02 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, Jan 21, 2016 at 03:53:42PM +0900, AKASHI Takahiro wrote:
> On 01/20/2016 08:49 PM, Mark Rutland wrote:
> >On Wed, Jan 20, 2016 at 03:07:53PM +0900, AKASHI Takahiro wrote:
> >>On 01/20/2016 11:49 AM, Dave Young wrote:
> >>>On 01/19/16 at 02:01pm, Mark Rutland wrote:
> >>>>On Tue, Jan 19, 2016 at 09:45:53PM +0800, Dave Young wrote:
> >>>>>On 01/19/16 at 12:51pm, Mark Rutland wrote:
> >>>>>>On Tue, Jan 19, 2016 at 08:28:48PM +0800, Dave Young wrote:
> >>>>>>>On 01/19/16 at 02:35pm, AKASHI Takahiro wrote:
> >>>>>>>>On 01/19/2016 10:43 AM, Dave Young wrote:
> >>>>>>>>>X86 takes another way in latest kexec-tools and kexec_file_load, that is
> >>>>>>>>>recreating E820 table and pass it to kexec/kdump kernel, if the entries
> >>>>>>>>>are over E820 limitation then turn to use setup_data list for remain
> >>>>>>>>>entries.
> >>>>>>>>
> >>>>>>>>Thanks. I will visit x86 code again.
> >>>>>>>>
> >>>>>>>>>I think it is X86 specific. Personally I think device tree property is
> >>>>>>>>>better.
> >>>>>>>>
> >>>>>>>>Do you think so?
> >>>>>>>
> >>>>>>>I'm not sure it is the best way. For X86 we run into problem with
> >>>>>>>memmap= design, one example is pci domain X (X>1) need the pci memory
> >>>>>>>ranges being passed to kdump kernel. When we passed reserved ranges in /proc/iomem
> >>>>>>>to 2nd kernel we find that cmdline[] array is not big enough.
> >>>>>>
> >>>>>>I'm not sure how PCI ranges relate to the memory map used for normal
> >>>>>>memory (i.e. RAM), though I'm probably missing some caveat with the way
> >>>>>>ACPI and UEFI describe PCI. Why does memmap= affect PCI memory?
> >>>>>
> >>>>>Here is the old patch which was rejected in kexec-tools:
> >>>>>http://lists.infradead.org/pipermail/kexec/2013-February/007924.html
> >>>>>
> >>>>>>
> >>>>>>If the kernel got the rest of its system topology from DT, the PCI
> >>>>>>regions would be described there.
> >>>>>
> >>>>>Yes, if kdump kernel use same DT as 1st kernel.
> >>>>
> >>>>Other than for testing purposes, I don't see why you'd pass the kdump
> >>>>kernel a DTB inconsistent with that the 1st kernel was passsed (other
> >>>>than some proerties under /chosen).
> >>>>
> >>>>We added /sys/firmware/fdt specifically to allow the kexec tools to get
> >>>>the exact DTB the first kernel used. There's no reason for tools to have
> >>>>to make something up.
> >>>
> >>>Agreed, but kexec-tools has an option to pass in any dtb files. Who knows
> >>>how one will use it unless dropping the option and use /sys/firmware/fdt
> >>>unconditionally.
> >>
> >>As a matter of fact, specifying proper command line parameters as well as
> >>dtb is partly users' responsibility for kdump to work correctly.
> >>(especially for BE kernel)
> >>
> >>>If we choose to implement kexec_file_load only in kernel, the interfaces
> >>>provided are kernel, initrd and cmdline. We can always use same dtb.
> >>
> >>I would say that we can always use the same dtb even with kexec_load
> >>from user's perspective. Right?
> >
> >No.
> >
> >This breaks using kexec for boot-loader purposes, and imposes a policy.
> 
> What kind of policy?
> I said "can", but if we want to use other setting/configuration, we can
> still have a full control.

Apologies, I misunderstood.

In most cases, using the existing DTB (with minor modifications to
/chosen for bootargs and such) is fine. If the user just wants to boot
another Linux kernel, that's generally what they should do.

I think we're agreed on that.

However, there are cases when the user may want to use a different DTB,
or use a different purgatory. So we cannot mandate that the existing DTB
is reused, nor that an in-kernel purgatory must be used, as that imposes
a policy.

> >For better or worse kexec_file_load has always imposed a constrained
> >Linux-only policy, so that's a different story.
> >
> >>>>There's a horrible edge case I've spotted if performing a chain of
> >>>>cross-endian kexecs: LE -> BE -> LE, as the BE kernel would have to
> >>>>respect the EFI memory map so as to avoid corrupting it for the
> >>>>subsequent LE kernel. Other than this I believe everything should just
> >>>>work.
> >>>
> >>>Firmware do not know kernel endianniess, kernel should respect firmware
> >>>maps and adapt to it, it sounds like a generic issue not specfic to kexec.
> >>
> >>On arm64, a kernel image header has a bit field to specify the image's endianness.
> >>Anyway, our current implementation replies on a user-supplied dtb to start BE kernel.
> >
> >The firmware should _never_ care about the kernel's endianness. The
> >bootlaoder or first kernel shouldn't care about the next kernel's
> >endianness apart from in exceptional circumstances. The DTB for a LE
> >kernel should look identical to that passed to a BE kernel.
> 
> Please note that I didn't say anything different from your last two statements.
> The current arm64 kexec implementation doesn't do anything specific to BE,
> but as far as BE kernel doesn't support UEFI, users are responsible for
> providing a proper dtb.

I'm just confused as to what you mean by a "proper dtb" in that case.

If you just mean one with memory nodes hacked in, then that would
currently be a way to make that work, yes.

It seems like the better option is to fix the BE kernel to support a
UEFI memory map, as that solves other issues.

> 
> >In my mind, the only valid reason to look at that bit is so that
> >bootloaders can provide a warning if the CPU does not implement that
> >endianness.
> >
> >The issue I mention above should be solved by changes to the BE kernel.
> >
> >>>>>Is it possible to modify uefi memmap for kdump case?
> >>>>
> >>>>Technically it would be possible, however I don't think it's necessary,
> >>>>and I think it would be disadvantageous to do so.
> >>>>
> >>>>Describing the range(s) the crash kernel can use in separate properties
> >>>>under /chosen has a number of advantages.
> >>>
> >>>Ok, I got the points. We have a is_kdump_kernel() by checking if there is
> >>>elfcorehdr_addr kernel cmdline. This is mainly for some drivers which
> >>>do not work well in kdump kernel some uncertain reasons. But ideally I
> >>>think kernel should handle things just like in 1st kernel and avoid to use
> >>>it.
> >>
> >>So I'm not still sure about what are advantages of a property under /chosen
> >>over "memmap=" kernel parameter.
> >>Both are simple and can have the same effect with minimizing changes to dtb.
> >>(But if, in the latter case, we have to provide *all* the memory-related information
> >>through "memmap=" parameters, it would be much complicated.)
> >
> >The reason I prefer a property over command line additions include:
> 
> Take some examples:
> (a) a property under /chosen
>   {
>     chosen = {
>       cmdline = "elfcorehdr=AA at BB maxcpus=1 ...";
>     }
>     usable-memory = <XX YY>;
>     memory {
>       ...
>     }
>   }
> 
> (b) a kernel command line parameter
>   (I use the same name, "usable-memory", to show the similarity. may use another name though.)
>   {
>     chosen = {
>       cmdline = "elfcorehdr=AA at BB maxcpus=1 usable-memory=YY at XX ...";
>     }
>     memory {
>       ...
>     }
>   }
> 
> >* It keeps the command line simple (as you mention the opposite is
> >   "complicated").
> 
> I think both are simple.
> 
> >* It is logically separate from options the user may pass to the kernel
> >   in that the restricted region(s) of memory avaialble are effectively
> >   properties of the system (in that the crashed OS is part of the system
> >   state).
> 
> "elfcorehdr=" parameter already breaks your point.
> "elfcorehdr=" looks to be, what you say, a system property, and is actually
> added by kexec-tools on all architectures, and "usable-memory", whether it is
> a DT property or a kernel parameter, will also be added by kexec-tools.
> (Users don't have to care.)

Just because architectures do one thing today does not mean that we have
to follow it.

I don't think that breaks my point so much as shows that a different
approach is taken by others today.

There's also no reason this cannot be a property under /chosen.

> >* The semantics of the command line parsing can change subtly over time
> >   (for example, see 51e158c12aca3c9a, which terminates command line
> >   parseing at "--"). Maknig sure that a command line option will
> >   actually be parsed by the next kernel is not trivial.
> >
> >   Keeping this information isolated from the command line is more
> >   robust.
> 
> Even so, who wants to use kdump without testing?
> and this is not a kdump specific issue.
> 
> >* Addition of a property is a self-contained operation, that doesn't
> >   require any knowledge about the command line.
> 
> I don't get your point here.
> For a kernel parameter, early_param() can encapsulate all the stuffs necessary.
> Once the kernel recognizes a usable memory region, limiting available
> memory should be done in the exact same way.

I mean when modifying the command line.

To place "elfcorehdr=" or "memmap="/"usable-memory=" into the command
line, one needs to know where it is valid to place it. Appending doesn't
always work, as per the example above with 51e158c12aca3c9a.

For both of these my point was that generally there is some fragility in
this area. While it's easy to say that breaking this would be someone
else's fault, we can easily avoid the possibility of that happening, and
avoid a set of problems trying to maintain backwards compatibility if
there were a sensible change that happened to break things.

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 18/19] arm64: kdump: update a kernel doc
@ 2016-01-21 12:02                             ` Mark Rutland
  0 siblings, 0 replies; 174+ messages in thread
From: Mark Rutland @ 2016-01-21 12:02 UTC (permalink / raw)
  To: AKASHI Takahiro
  Cc: ard.biesheuvel, Geoff Levand, Catalin Marinas, Will Deacon,
	marc.zyngier, James Morse, christoffer.dall, Dave Young, kexec,
	linux-arm-kernel

On Thu, Jan 21, 2016 at 03:53:42PM +0900, AKASHI Takahiro wrote:
> On 01/20/2016 08:49 PM, Mark Rutland wrote:
> >On Wed, Jan 20, 2016 at 03:07:53PM +0900, AKASHI Takahiro wrote:
> >>On 01/20/2016 11:49 AM, Dave Young wrote:
> >>>On 01/19/16 at 02:01pm, Mark Rutland wrote:
> >>>>On Tue, Jan 19, 2016 at 09:45:53PM +0800, Dave Young wrote:
> >>>>>On 01/19/16 at 12:51pm, Mark Rutland wrote:
> >>>>>>On Tue, Jan 19, 2016 at 08:28:48PM +0800, Dave Young wrote:
> >>>>>>>On 01/19/16 at 02:35pm, AKASHI Takahiro wrote:
> >>>>>>>>On 01/19/2016 10:43 AM, Dave Young wrote:
> >>>>>>>>>X86 takes another way in latest kexec-tools and kexec_file_load, that is
> >>>>>>>>>recreating E820 table and pass it to kexec/kdump kernel, if the entries
> >>>>>>>>>are over E820 limitation then turn to use setup_data list for remain
> >>>>>>>>>entries.
> >>>>>>>>
> >>>>>>>>Thanks. I will visit x86 code again.
> >>>>>>>>
> >>>>>>>>>I think it is X86 specific. Personally I think device tree property is
> >>>>>>>>>better.
> >>>>>>>>
> >>>>>>>>Do you think so?
> >>>>>>>
> >>>>>>>I'm not sure it is the best way. For X86 we run into problem with
> >>>>>>>memmap= design, one example is pci domain X (X>1) need the pci memory
> >>>>>>>ranges being passed to kdump kernel. When we passed reserved ranges in /proc/iomem
> >>>>>>>to 2nd kernel we find that cmdline[] array is not big enough.
> >>>>>>
> >>>>>>I'm not sure how PCI ranges relate to the memory map used for normal
> >>>>>>memory (i.e. RAM), though I'm probably missing some caveat with the way
> >>>>>>ACPI and UEFI describe PCI. Why does memmap= affect PCI memory?
> >>>>>
> >>>>>Here is the old patch which was rejected in kexec-tools:
> >>>>>http://lists.infradead.org/pipermail/kexec/2013-February/007924.html
> >>>>>
> >>>>>>
> >>>>>>If the kernel got the rest of its system topology from DT, the PCI
> >>>>>>regions would be described there.
> >>>>>
> >>>>>Yes, if kdump kernel use same DT as 1st kernel.
> >>>>
> >>>>Other than for testing purposes, I don't see why you'd pass the kdump
> >>>>kernel a DTB inconsistent with that the 1st kernel was passsed (other
> >>>>than some proerties under /chosen).
> >>>>
> >>>>We added /sys/firmware/fdt specifically to allow the kexec tools to get
> >>>>the exact DTB the first kernel used. There's no reason for tools to have
> >>>>to make something up.
> >>>
> >>>Agreed, but kexec-tools has an option to pass in any dtb files. Who knows
> >>>how one will use it unless dropping the option and use /sys/firmware/fdt
> >>>unconditionally.
> >>
> >>As a matter of fact, specifying proper command line parameters as well as
> >>dtb is partly users' responsibility for kdump to work correctly.
> >>(especially for BE kernel)
> >>
> >>>If we choose to implement kexec_file_load only in kernel, the interfaces
> >>>provided are kernel, initrd and cmdline. We can always use same dtb.
> >>
> >>I would say that we can always use the same dtb even with kexec_load
> >>from user's perspective. Right?
> >
> >No.
> >
> >This breaks using kexec for boot-loader purposes, and imposes a policy.
> 
> What kind of policy?
> I said "can", but if we want to use other setting/configuration, we can
> still have a full control.

Apologies, I misunderstood.

In most cases, using the existing DTB (with minor modifications to
/chosen for bootargs and such) is fine. If the user just wants to boot
another Linux kernel, that's generally what they should do.

I think we're agreed on that.

However, there are cases when the user may want to use a different DTB,
or use a different purgatory. So we cannot mandate that the existing DTB
is reused, nor that an in-kernel purgatory must be used, as that imposes
a policy.

> >For better or worse kexec_file_load has always imposed a constrained
> >Linux-only policy, so that's a different story.
> >
> >>>>There's a horrible edge case I've spotted if performing a chain of
> >>>>cross-endian kexecs: LE -> BE -> LE, as the BE kernel would have to
> >>>>respect the EFI memory map so as to avoid corrupting it for the
> >>>>subsequent LE kernel. Other than this I believe everything should just
> >>>>work.
> >>>
> >>>Firmware do not know kernel endianniess, kernel should respect firmware
> >>>maps and adapt to it, it sounds like a generic issue not specfic to kexec.
> >>
> >>On arm64, a kernel image header has a bit field to specify the image's endianness.
> >>Anyway, our current implementation replies on a user-supplied dtb to start BE kernel.
> >
> >The firmware should _never_ care about the kernel's endianness. The
> >bootlaoder or first kernel shouldn't care about the next kernel's
> >endianness apart from in exceptional circumstances. The DTB for a LE
> >kernel should look identical to that passed to a BE kernel.
> 
> Please note that I didn't say anything different from your last two statements.
> The current arm64 kexec implementation doesn't do anything specific to BE,
> but as far as BE kernel doesn't support UEFI, users are responsible for
> providing a proper dtb.

I'm just confused as to what you mean by a "proper dtb" in that case.

If you just mean one with memory nodes hacked in, then that would
currently be a way to make that work, yes.

It seems like the better option is to fix the BE kernel to support a
UEFI memory map, as that solves other issues.

> 
> >In my mind, the only valid reason to look at that bit is so that
> >bootloaders can provide a warning if the CPU does not implement that
> >endianness.
> >
> >The issue I mention above should be solved by changes to the BE kernel.
> >
> >>>>>Is it possible to modify uefi memmap for kdump case?
> >>>>
> >>>>Technically it would be possible, however I don't think it's necessary,
> >>>>and I think it would be disadvantageous to do so.
> >>>>
> >>>>Describing the range(s) the crash kernel can use in separate properties
> >>>>under /chosen has a number of advantages.
> >>>
> >>>Ok, I got the points. We have a is_kdump_kernel() by checking if there is
> >>>elfcorehdr_addr kernel cmdline. This is mainly for some drivers which
> >>>do not work well in kdump kernel some uncertain reasons. But ideally I
> >>>think kernel should handle things just like in 1st kernel and avoid to use
> >>>it.
> >>
> >>So I'm not still sure about what are advantages of a property under /chosen
> >>over "memmap=" kernel parameter.
> >>Both are simple and can have the same effect with minimizing changes to dtb.
> >>(But if, in the latter case, we have to provide *all* the memory-related information
> >>through "memmap=" parameters, it would be much complicated.)
> >
> >The reason I prefer a property over command line additions include:
> 
> Take some examples:
> (a) a property under /chosen
>   {
>     chosen = {
>       cmdline = "elfcorehdr=AA@BB maxcpus=1 ...";
>     }
>     usable-memory = <XX YY>;
>     memory {
>       ...
>     }
>   }
> 
> (b) a kernel command line parameter
>   (I use the same name, "usable-memory", to show the similarity. may use another name though.)
>   {
>     chosen = {
>       cmdline = "elfcorehdr=AA@BB maxcpus=1 usable-memory=YY@XX ...";
>     }
>     memory {
>       ...
>     }
>   }
> 
> >* It keeps the command line simple (as you mention the opposite is
> >   "complicated").
> 
> I think both are simple.
> 
> >* It is logically separate from options the user may pass to the kernel
> >   in that the restricted region(s) of memory avaialble are effectively
> >   properties of the system (in that the crashed OS is part of the system
> >   state).
> 
> "elfcorehdr=" parameter already breaks your point.
> "elfcorehdr=" looks to be, what you say, a system property, and is actually
> added by kexec-tools on all architectures, and "usable-memory", whether it is
> a DT property or a kernel parameter, will also be added by kexec-tools.
> (Users don't have to care.)

Just because architectures do one thing today does not mean that we have
to follow it.

I don't think that breaks my point so much as shows that a different
approach is taken by others today.

There's also no reason this cannot be a property under /chosen.

> >* The semantics of the command line parsing can change subtly over time
> >   (for example, see 51e158c12aca3c9a, which terminates command line
> >   parseing at "--"). Maknig sure that a command line option will
> >   actually be parsed by the next kernel is not trivial.
> >
> >   Keeping this information isolated from the command line is more
> >   robust.
> 
> Even so, who wants to use kdump without testing?
> and this is not a kdump specific issue.
> 
> >* Addition of a property is a self-contained operation, that doesn't
> >   require any knowledge about the command line.
> 
> I don't get your point here.
> For a kernel parameter, early_param() can encapsulate all the stuffs necessary.
> Once the kernel recognizes a usable memory region, limiting available
> memory should be done in the exact same way.

I mean when modifying the command line.

To place "elfcorehdr=" or "memmap="/"usable-memory=" into the command
line, one needs to know where it is valid to place it. Appending doesn't
always work, as per the example above with 51e158c12aca3c9a.

For both of these my point was that generally there is some fragility in
this area. While it's easy to say that breaking this would be someone
else's fault, we can easily avoid the possibility of that happening, and
avoid a set of problems trying to maintain backwards compatibility if
there were a sensible change that happened to break things.

Thanks,
Mark.

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 00/19] arm64 kexec kernel patches v13
  2016-01-20  2:56       ` Dave Young
@ 2016-01-21 12:11         ` Mark Rutland
  -1 siblings, 0 replies; 174+ messages in thread
From: Mark Rutland @ 2016-01-21 12:11 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, Jan 20, 2016 at 10:56:21AM +0800, Dave Young wrote:
> On 01/19/16 at 04:15pm, Geoff Levand wrote:
> > On Tue, 2016-01-19 at 20:32 +0800, Dave Young wrote:
> > > Geoff, another question about kexec-tools part is, can the kexec
> > > -tools code
> > > been written in kernel? We have the infrastructure for kexec_file_load.
> > 
> > I see no technical reason why the arm64 kernel cannot support
> > kexec_file_load.
> 
> Cool, care to port it to kernel so that we have kexec_file_load only in arm64
> we do not need to support both kexec_load and kexec_file_load?

I have reasons for wanting kexec_load, even if we have kexec_file_load.
For example, being able to test modified DTBs during development, and
being able to kexec to non-Linux OSs.

I don't think we should drop kexec_load, as kexec_file_load is designed
such that it cannot be used for these purposes, while kexec_load is more
general.

I appreciate that we will need kexec_file_load for Secure Boot, and it
would be perfectly fine for your kdump userspace to require and always
use kexec_file_load.

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 00/19] arm64 kexec kernel patches v13
@ 2016-01-21 12:11         ` Mark Rutland
  0 siblings, 0 replies; 174+ messages in thread
From: Mark Rutland @ 2016-01-21 12:11 UTC (permalink / raw)
  To: Dave Young
  Cc: Geoff Levand, Catalin Marinas, Will Deacon, AKASHI Takahiro,
	James Morse, christoffer.dall, marc.zyngier, kexec,
	linux-arm-kernel

On Wed, Jan 20, 2016 at 10:56:21AM +0800, Dave Young wrote:
> On 01/19/16 at 04:15pm, Geoff Levand wrote:
> > On Tue, 2016-01-19 at 20:32 +0800, Dave Young wrote:
> > > Geoff, another question about kexec-tools part is, can the kexec
> > > -tools code
> > > been written in kernel? We have the infrastructure for kexec_file_load.
> > 
> > I see no technical reason why the arm64 kernel cannot support
> > kexec_file_load.
> 
> Cool, care to port it to kernel so that we have kexec_file_load only in arm64
> we do not need to support both kexec_load and kexec_file_load?

I have reasons for wanting kexec_load, even if we have kexec_file_load.
For example, being able to test modified DTBs during development, and
being able to kexec to non-Linux OSs.

I don't think we should drop kexec_load, as kexec_file_load is designed
such that it cannot be used for these purposes, while kexec_load is more
general.

I appreciate that we will need kexec_file_load for Secure Boot, and it
would be perfectly fine for your kdump userspace to require and always
use kexec_file_load.

Thanks,
Mark.

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 18/19] arm64: kdump: update a kernel doc
  2016-01-21  5:43                             ` AKASHI Takahiro
@ 2016-01-21 13:02                               ` Mark Rutland
  -1 siblings, 0 replies; 174+ messages in thread
From: Mark Rutland @ 2016-01-21 13:02 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, Jan 21, 2016 at 02:43:15PM +0900, AKASHI Takahiro wrote:
> On 01/20/2016 11:59 PM, Ard Biesheuvel wrote:
> >On 20 January 2016 at 13:36, Mark Rutland <mark.rutland@arm.com> wrote:
> >>Ard, Ganapatrao, the below is something we need to consider for the
> >>combination of the NUMA & kexec approaches. It only becomes a problem
> >>if/when we preserve DT memory nodes in the presence of EFI, though it
> >>would be nice to not box ourselves into a corner.
> >>
> >>On Wed, Jan 20, 2016 at 12:02:58PM +0000, Mark Rutland wrote:
> >>>On Wed, Jan 20, 2016 at 02:25:07PM +0900, AKASHI Takahiro wrote:
> >>>>On 01/19/2016 11:01 PM, Mark Rutland wrote:
> >>>>>For NUMA topology in !ACPI kernels, we might need to also retain and
> >>>>>parse memory nodes, but only for toplogy information. The kernel would
> >>>>>still only use memory as described by the EFI memory map.
> >>>>>
> >>>>>There's a horrible edge case I've spotted if performing a chain of
> >>>>>cross-endian kexecs: LE -> BE -> LE, as the BE kernel would have to
> >>>>>respect the EFI memory map so as to avoid corrupting it for the
> >>>>>subsequent LE kernel. Other than this I believe everything should just
> >>>>>work.
> >>>>
> >>>>BE kernel doesn't support UEFI yet and cannot access UEFI memmap table. So,
> >>>>for LE -> BE, we don't use a dtb generated from /sys/firmware/fdt (or /proc/device-tree)
> >>>>(as in the case of LE -> LE) and require users to provide a dtb file explicitly.
> >>>
> >>>As I mentioned above, the problem exists when memory nodes also exist
> >>>(for describing NUMA topology). In that case the BE kernel would try to
> >>>use the information from the memory nodes.
> >>>
> >>>>For BE -> LE, BE kernel doesn't know wther UEFI memmap table is available or not
> >>>>and so use the same (explicitly-provided) dtb (as LE -> LE in !UEFI)
> >>>
> >>>See above. The problem I imagine is:
> >>>
> >>>LE kernel - uses EFI mmap, takes NUMA information from DT memory nodes
> >>>
> >>>     v       kexec
> >>>
> >>>BE kernel - uses DT memory nodes
> >>>           - clobbers EFI runtime regions as it sees them as available
> >>>
> >>>     v       kexec
> >>>
> >>>LE kernel - uses EFI mmap, takes NUMA information from DT memory nodes
> >>>           - tries to call EFI runtime services, and explodes.
> >>
> >>I'm not really sure what the best approach is here, but I thought that
> >>it would be good to raise awareness of the edge-case.
> >>
> >
> >I think we should simply allow the BE kernel to deal with a UEFI
> >memory map. It only involves a bit of byte swapping (which I already
> >implemented at some point)
> 
> Just from my curiosity,
> will runtime services be also available on BE kernel with LE uefi?

It may be possible to implement that (I recall that Ard had a go), but
that's far more complicated than simply supporting the EFI memory map,
as you need separate (endian-swapped) page tables and other data
structures, lose the ability to handle exceptions, etc.

All that's suggested above is supporting the memory map.

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 18/19] arm64: kdump: update a kernel doc
@ 2016-01-21 13:02                               ` Mark Rutland
  0 siblings, 0 replies; 174+ messages in thread
From: Mark Rutland @ 2016-01-21 13:02 UTC (permalink / raw)
  To: AKASHI Takahiro
  Cc: Ard Biesheuvel, Geoff Levand, Catalin Marinas, Will Deacon,
	Marc Zyngier, James Morse, linux-arm-kernel, Ganapatrao Kulkarni,
	Dave Young, kexec, Christoffer Dall

On Thu, Jan 21, 2016 at 02:43:15PM +0900, AKASHI Takahiro wrote:
> On 01/20/2016 11:59 PM, Ard Biesheuvel wrote:
> >On 20 January 2016 at 13:36, Mark Rutland <mark.rutland@arm.com> wrote:
> >>Ard, Ganapatrao, the below is something we need to consider for the
> >>combination of the NUMA & kexec approaches. It only becomes a problem
> >>if/when we preserve DT memory nodes in the presence of EFI, though it
> >>would be nice to not box ourselves into a corner.
> >>
> >>On Wed, Jan 20, 2016 at 12:02:58PM +0000, Mark Rutland wrote:
> >>>On Wed, Jan 20, 2016 at 02:25:07PM +0900, AKASHI Takahiro wrote:
> >>>>On 01/19/2016 11:01 PM, Mark Rutland wrote:
> >>>>>For NUMA topology in !ACPI kernels, we might need to also retain and
> >>>>>parse memory nodes, but only for toplogy information. The kernel would
> >>>>>still only use memory as described by the EFI memory map.
> >>>>>
> >>>>>There's a horrible edge case I've spotted if performing a chain of
> >>>>>cross-endian kexecs: LE -> BE -> LE, as the BE kernel would have to
> >>>>>respect the EFI memory map so as to avoid corrupting it for the
> >>>>>subsequent LE kernel. Other than this I believe everything should just
> >>>>>work.
> >>>>
> >>>>BE kernel doesn't support UEFI yet and cannot access UEFI memmap table. So,
> >>>>for LE -> BE, we don't use a dtb generated from /sys/firmware/fdt (or /proc/device-tree)
> >>>>(as in the case of LE -> LE) and require users to provide a dtb file explicitly.
> >>>
> >>>As I mentioned above, the problem exists when memory nodes also exist
> >>>(for describing NUMA topology). In that case the BE kernel would try to
> >>>use the information from the memory nodes.
> >>>
> >>>>For BE -> LE, BE kernel doesn't know wther UEFI memmap table is available or not
> >>>>and so use the same (explicitly-provided) dtb (as LE -> LE in !UEFI)
> >>>
> >>>See above. The problem I imagine is:
> >>>
> >>>LE kernel - uses EFI mmap, takes NUMA information from DT memory nodes
> >>>
> >>>     v       kexec
> >>>
> >>>BE kernel - uses DT memory nodes
> >>>           - clobbers EFI runtime regions as it sees them as available
> >>>
> >>>     v       kexec
> >>>
> >>>LE kernel - uses EFI mmap, takes NUMA information from DT memory nodes
> >>>           - tries to call EFI runtime services, and explodes.
> >>
> >>I'm not really sure what the best approach is here, but I thought that
> >>it would be good to raise awareness of the edge-case.
> >>
> >
> >I think we should simply allow the BE kernel to deal with a UEFI
> >memory map. It only involves a bit of byte swapping (which I already
> >implemented at some point)
> 
> Just from my curiosity,
> will runtime services be also available on BE kernel with LE uefi?

It may be possible to implement that (I recall that Ard had a go), but
that's far more complicated than simply supporting the EFI memory map,
as you need separate (endian-swapped) page tables and other data
structures, lose the ability to handle exceptions, etc.

All that's suggested above is supporting the memory map.

Thanks,
Mark.

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 16/19] arm64: kdump: add kdump support
  2016-01-15 19:18   ` Geoff Levand
@ 2016-01-21 14:17     ` James Morse
  -1 siblings, 0 replies; 174+ messages in thread
From: James Morse @ 2016-01-21 14:17 UTC (permalink / raw)
  To: linux-arm-kernel

Hi!

On 15/01/16 19:18, Geoff Levand wrote:
> From: AKASHI Takahiro <takahiro.akashi@linaro.org>
> 
> On crash dump kernel, all the information about primary kernel's core
> image is available in elf core header specified by "elfcorehdr=" boot
> parameter. reserve_elfcorehdr() will set aside the region to avoid any
> corruption by crash dump kernel.
> 
> Crash dump kernel will access the system memory of primary kernel via
> copy_oldmem_page(), which reads one page by ioremap'ing it since it does
> not reside in linear mapping on crash dump kernel.
> Please note that we should add "mem=X[MG]" boot parameter to limit the
> memory size and avoid the following assertion at ioremap():
> 	if (WARN_ON(pfn_valid(__phys_to_pfn(phys_addr))))
> 		return NULL;
> when accessing any pages beyond the usable memories of crash dump kernel.
> 
> We also need our own elfcorehdr_read() here since the weak definition of
> elfcorehdr_read() utilizes copy_oldmem_page() and will hit the assertion
> above on arm64.

How does this work for a different-endian crash kernel?

It looks like fs/proc/vmcore.c:parse_crash_elf64_headers() does some
validation on the elf-headers that doesn't look like it will work if
elfcorehdr:endian != kernel:endian.

Is it worth preventing this corner case by getting kexec-tools to
prevent linux with a different endianness being loaded as a kdump kernel?


Thanks,

James

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 16/19] arm64: kdump: add kdump support
@ 2016-01-21 14:17     ` James Morse
  0 siblings, 0 replies; 174+ messages in thread
From: James Morse @ 2016-01-21 14:17 UTC (permalink / raw)
  To: Geoff Levand, AKASHI Takahiro
  Cc: Mark Rutland, marc.zyngier, Catalin Marinas, Will Deacon,
	linux-arm-kernel, kexec, christoffer.dall

Hi!

On 15/01/16 19:18, Geoff Levand wrote:
> From: AKASHI Takahiro <takahiro.akashi@linaro.org>
> 
> On crash dump kernel, all the information about primary kernel's core
> image is available in elf core header specified by "elfcorehdr=" boot
> parameter. reserve_elfcorehdr() will set aside the region to avoid any
> corruption by crash dump kernel.
> 
> Crash dump kernel will access the system memory of primary kernel via
> copy_oldmem_page(), which reads one page by ioremap'ing it since it does
> not reside in linear mapping on crash dump kernel.
> Please note that we should add "mem=X[MG]" boot parameter to limit the
> memory size and avoid the following assertion at ioremap():
> 	if (WARN_ON(pfn_valid(__phys_to_pfn(phys_addr))))
> 		return NULL;
> when accessing any pages beyond the usable memories of crash dump kernel.
> 
> We also need our own elfcorehdr_read() here since the weak definition of
> elfcorehdr_read() utilizes copy_oldmem_page() and will hit the assertion
> above on arm64.

How does this work for a different-endian crash kernel?

It looks like fs/proc/vmcore.c:parse_crash_elf64_headers() does some
validation on the elf-headers that doesn't look like it will work if
elfcorehdr:endian != kernel:endian.

Is it worth preventing this corner case by getting kexec-tools to
prevent linux with a different endianness being loaded as a kdump kernel?


Thanks,

James



_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 16/19] arm64: kdump: add kdump support
  2016-01-21 14:17     ` James Morse
@ 2016-01-22  4:50       ` AKASHI Takahiro
  -1 siblings, 0 replies; 174+ messages in thread
From: AKASHI Takahiro @ 2016-01-22  4:50 UTC (permalink / raw)
  To: linux-arm-kernel

James,

On 01/21/2016 11:17 PM, James Morse wrote:
> Hi!
>
> On 15/01/16 19:18, Geoff Levand wrote:
>> From: AKASHI Takahiro <takahiro.akashi@linaro.org>
>>
>> On crash dump kernel, all the information about primary kernel's core
>> image is available in elf core header specified by "elfcorehdr=" boot
>> parameter. reserve_elfcorehdr() will set aside the region to avoid any
>> corruption by crash dump kernel.
>>
>> Crash dump kernel will access the system memory of primary kernel via
>> copy_oldmem_page(), which reads one page by ioremap'ing it since it does
>> not reside in linear mapping on crash dump kernel.
>> Please note that we should add "mem=X[MG]" boot parameter to limit the
>> memory size and avoid the following assertion at ioremap():
>> 	if (WARN_ON(pfn_valid(__phys_to_pfn(phys_addr))))
>> 		return NULL;
>> when accessing any pages beyond the usable memories of crash dump kernel.
>>
>> We also need our own elfcorehdr_read() here since the weak definition of
>> elfcorehdr_read() utilizes copy_oldmem_page() and will hit the assertion
>> above on arm64.
>
> How does this work for a different-endian crash kernel?
>
> It looks like fs/proc/vmcore.c:parse_crash_elf64_headers() does some
> validation on the elf-headers that doesn't look like it will work if
> elfcorehdr:endian != kernel:endian.

Good point!

> Is it worth preventing this corner case by getting kexec-tools to
> prevent linux with a different endianness being loaded as a kdump kernel?

It may be, but I think that people should always check whether or not kdump
will work in their environment before puting the system in practical use.

(I know the check costs almost nothing though.)

Thanks,
-Takahiro AKASHI

>
> Thanks,
>
> James
>
>

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 16/19] arm64: kdump: add kdump support
@ 2016-01-22  4:50       ` AKASHI Takahiro
  0 siblings, 0 replies; 174+ messages in thread
From: AKASHI Takahiro @ 2016-01-22  4:50 UTC (permalink / raw)
  To: James Morse, Geoff Levand
  Cc: Mark Rutland, marc.zyngier, Catalin Marinas, Will Deacon,
	linux-arm-kernel, kexec, christoffer.dall

James,

On 01/21/2016 11:17 PM, James Morse wrote:
> Hi!
>
> On 15/01/16 19:18, Geoff Levand wrote:
>> From: AKASHI Takahiro <takahiro.akashi@linaro.org>
>>
>> On crash dump kernel, all the information about primary kernel's core
>> image is available in elf core header specified by "elfcorehdr=" boot
>> parameter. reserve_elfcorehdr() will set aside the region to avoid any
>> corruption by crash dump kernel.
>>
>> Crash dump kernel will access the system memory of primary kernel via
>> copy_oldmem_page(), which reads one page by ioremap'ing it since it does
>> not reside in linear mapping on crash dump kernel.
>> Please note that we should add "mem=X[MG]" boot parameter to limit the
>> memory size and avoid the following assertion at ioremap():
>> 	if (WARN_ON(pfn_valid(__phys_to_pfn(phys_addr))))
>> 		return NULL;
>> when accessing any pages beyond the usable memories of crash dump kernel.
>>
>> We also need our own elfcorehdr_read() here since the weak definition of
>> elfcorehdr_read() utilizes copy_oldmem_page() and will hit the assertion
>> above on arm64.
>
> How does this work for a different-endian crash kernel?
>
> It looks like fs/proc/vmcore.c:parse_crash_elf64_headers() does some
> validation on the elf-headers that doesn't look like it will work if
> elfcorehdr:endian != kernel:endian.

Good point!

> Is it worth preventing this corner case by getting kexec-tools to
> prevent linux with a different endianness being loaded as a kdump kernel?

It may be, but I think that people should always check whether or not kdump
will work in their environment before puting the system in practical use.

(I know the check costs almost nothing though.)

Thanks,
-Takahiro AKASHI

>
> Thanks,
>
> James
>
>

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 18/19] arm64: kdump: update a kernel doc
  2016-01-21 12:02                             ` Mark Rutland
@ 2016-01-22  6:23                               ` AKASHI Takahiro
  -1 siblings, 0 replies; 174+ messages in thread
From: AKASHI Takahiro @ 2016-01-22  6:23 UTC (permalink / raw)
  To: linux-arm-kernel

On 01/21/2016 09:02 PM, Mark Rutland wrote:
> On Thu, Jan 21, 2016 at 03:53:42PM +0900, AKASHI Takahiro wrote:
>> On 01/20/2016 08:49 PM, Mark Rutland wrote:
>>> On Wed, Jan 20, 2016 at 03:07:53PM +0900, AKASHI Takahiro wrote:
>>>> On 01/20/2016 11:49 AM, Dave Young wrote:
>>>>> On 01/19/16 at 02:01pm, Mark Rutland wrote:
>>>>>> On Tue, Jan 19, 2016 at 09:45:53PM +0800, Dave Young wrote:
>>>>>>> On 01/19/16 at 12:51pm, Mark Rutland wrote:
>>>>>>>> On Tue, Jan 19, 2016 at 08:28:48PM +0800, Dave Young wrote:
>>>>>>>>> On 01/19/16 at 02:35pm, AKASHI Takahiro wrote:
>>>>>>>>>> On 01/19/2016 10:43 AM, Dave Young wrote:
>>>>>>>>>>> X86 takes another way in latest kexec-tools and kexec_file_load, that is
>>>>>>>>>>> recreating E820 table and pass it to kexec/kdump kernel, if the entries
>>>>>>>>>>> are over E820 limitation then turn to use setup_data list for remain
>>>>>>>>>>> entries.
>>>>>>>>>>
>>>>>>>>>> Thanks. I will visit x86 code again.
>>>>>>>>>>
>>>>>>>>>>> I think it is X86 specific. Personally I think device tree property is
>>>>>>>>>>> better.
>>>>>>>>>>
>>>>>>>>>> Do you think so?
>>>>>>>>>
>>>>>>>>> I'm not sure it is the best way. For X86 we run into problem with
>>>>>>>>> memmap= design, one example is pci domain X (X>1) need the pci memory
>>>>>>>>> ranges being passed to kdump kernel. When we passed reserved ranges in /proc/iomem
>>>>>>>>> to 2nd kernel we find that cmdline[] array is not big enough.
>>>>>>>>
>>>>>>>> I'm not sure how PCI ranges relate to the memory map used for normal
>>>>>>>> memory (i.e. RAM), though I'm probably missing some caveat with the way
>>>>>>>> ACPI and UEFI describe PCI. Why does memmap= affect PCI memory?
>>>>>>>
>>>>>>> Here is the old patch which was rejected in kexec-tools:
>>>>>>> http://lists.infradead.org/pipermail/kexec/2013-February/007924.html
>>>>>>>
>>>>>>>>
>>>>>>>> If the kernel got the rest of its system topology from DT, the PCI
>>>>>>>> regions would be described there.
>>>>>>>
>>>>>>> Yes, if kdump kernel use same DT as 1st kernel.
>>>>>>
>>>>>> Other than for testing purposes, I don't see why you'd pass the kdump
>>>>>> kernel a DTB inconsistent with that the 1st kernel was passsed (other
>>>>>> than some proerties under /chosen).
>>>>>>
>>>>>> We added /sys/firmware/fdt specifically to allow the kexec tools to get
>>>>>> the exact DTB the first kernel used. There's no reason for tools to have
>>>>>> to make something up.
>>>>>
>>>>> Agreed, but kexec-tools has an option to pass in any dtb files. Who knows
>>>>> how one will use it unless dropping the option and use /sys/firmware/fdt
>>>>> unconditionally.
>>>>
>>>> As a matter of fact, specifying proper command line parameters as well as
>>>> dtb is partly users' responsibility for kdump to work correctly.
>>>> (especially for BE kernel)
>>>>
>>>>> If we choose to implement kexec_file_load only in kernel, the interfaces
>>>>> provided are kernel, initrd and cmdline. We can always use same dtb.
>>>>
>>>> I would say that we can always use the same dtb even with kexec_load
>>> >from user's perspective. Right?
>>>
>>> No.
>>>
>>> This breaks using kexec for boot-loader purposes, and imposes a policy.
>>
>> What kind of policy?
>> I said "can", but if we want to use other setting/configuration, we can
>> still have a full control.
>
> Apologies, I misunderstood.
>
> In most cases, using the existing DTB (with minor modifications to
> /chosen for bootargs and such) is fine. If the user just wants to boot
> another Linux kernel, that's generally what they should do.
>
> I think we're agreed on that.

Yes.

> However, there are cases when the user may want to use a different DTB,
> or use a different purgatory. So we cannot mandate that the existing DTB
> is reused, nor that an in-kernel purgatory must be used, as that imposes
> a policy.

Agree!

>>> For better or worse kexec_file_load has always imposed a constrained
>>> Linux-only policy, so that's a different story.
>>>
>>>>>> There's a horrible edge case I've spotted if performing a chain of
>>>>>> cross-endian kexecs: LE -> BE -> LE, as the BE kernel would have to
>>>>>> respect the EFI memory map so as to avoid corrupting it for the
>>>>>> subsequent LE kernel. Other than this I believe everything should just
>>>>>> work.
>>>>>
>>>>> Firmware do not know kernel endianniess, kernel should respect firmware
>>>>> maps and adapt to it, it sounds like a generic issue not specfic to kexec.
>>>>
>>>> On arm64, a kernel image header has a bit field to specify the image's endianness.
>>>> Anyway, our current implementation replies on a user-supplied dtb to start BE kernel.
>>>
>>> The firmware should _never_ care about the kernel's endianness. The
>>> bootlaoder or first kernel shouldn't care about the next kernel's
>>> endianness apart from in exceptional circumstances. The DTB for a LE
>>> kernel should look identical to that passed to a BE kernel.
>>
>> Please note that I didn't say anything different from your last two statements.
>> The current arm64 kexec implementation doesn't do anything specific to BE,
>> but as far as BE kernel doesn't support UEFI, users are responsible for
>> providing a proper dtb.
>
> I'm just confused as to what you mean by a "proper dtb" in that case.
>
> If you just mean one with memory nodes hacked in, then that would
> currently be a way to make that work, yes.

One of useful cases that I have in my mind is kdump.
We may want to use a small sub-set of dtb, especially devices, to
make the reboot more reliable. Device drivers are likely to be vulnerable
at crash.

> It seems like the better option is to fix the BE kernel to support a
> UEFI memory map, as that solves other issues.

Why did Ard throw away his patch?

>>
>>> In my mind, the only valid reason to look at that bit is so that
>>> bootloaders can provide a warning if the CPU does not implement that
>>> endianness.
>>>
>>> The issue I mention above should be solved by changes to the BE kernel.
>>>
>>>>>>> Is it possible to modify uefi memmap for kdump case?
>>>>>>
>>>>>> Technically it would be possible, however I don't think it's necessary,
>>>>>> and I think it would be disadvantageous to do so.
>>>>>>
>>>>>> Describing the range(s) the crash kernel can use in separate properties
>>>>>> under /chosen has a number of advantages.
>>>>>
>>>>> Ok, I got the points. We have a is_kdump_kernel() by checking if there is
>>>>> elfcorehdr_addr kernel cmdline. This is mainly for some drivers which
>>>>> do not work well in kdump kernel some uncertain reasons. But ideally I
>>>>> think kernel should handle things just like in 1st kernel and avoid to use
>>>>> it.
>>>>
>>>> So I'm not still sure about what are advantages of a property under /chosen
>>>> over "memmap=" kernel parameter.
>>>> Both are simple and can have the same effect with minimizing changes to dtb.
>>>> (But if, in the latter case, we have to provide *all* the memory-related information
>>>> through "memmap=" parameters, it would be much complicated.)
>>>
>>> The reason I prefer a property over command line additions include:
>>
>> Take some examples:
>> (a) a property under /chosen
>>    {
>>      chosen = {
>>        cmdline = "elfcorehdr=AA at BB maxcpus=1 ...";
>>      }
>>      usable-memory = <XX YY>;
>>      memory {
>>        ...
>>      }
>>    }
>>
>> (b) a kernel command line parameter
>>    (I use the same name, "usable-memory", to show the similarity. may use another name though.)
>>    {
>>      chosen = {
>>        cmdline = "elfcorehdr=AA at BB maxcpus=1 usable-memory=YY at XX ...";
>>      }
>>      memory {
>>        ...
>>      }
>>    }
>>
>>> * It keeps the command line simple (as you mention the opposite is
>>>    "complicated").
>>
>> I think both are simple.
>>
>>> * It is logically separate from options the user may pass to the kernel
>>>    in that the restricted region(s) of memory avaialble are effectively
>>>    properties of the system (in that the crashed OS is part of the system
>>>    state).
>>
>> "elfcorehdr=" parameter already breaks your point.
>> "elfcorehdr=" looks to be, what you say, a system property, and is actually
>> added by kexec-tools on all architectures, and "usable-memory", whether it is
>> a DT property or a kernel parameter, will also be added by kexec-tools.
>> (Users don't have to care.)
>
> Just because architectures do one thing today does not mean that we have
> to follow it.
>
> I don't think that breaks my point so much as shows that a different
> approach is taken by others today.
>
> There's also no reason this cannot be a property under /chosen.

No, but no strong reason to be so IMO.

>>> * The semantics of the command line parsing can change subtly over time
>>>    (for example, see 51e158c12aca3c9a, which terminates command line
>>>    parseing at "--"). Maknig sure that a command line option will
>>>    actually be parsed by the next kernel is not trivial.
>>>
>>>    Keeping this information isolated from the command line is more
>>>    robust.
>>
>> Even so, who wants to use kdump without testing?
>> and this is not a kdump specific issue.
>>
>>> * Addition of a property is a self-contained operation, that doesn't
>>>    require any knowledge about the command line.
>>
>> I don't get your point here.
>> For a kernel parameter, early_param() can encapsulate all the stuffs necessary.
>> Once the kernel recognizes a usable memory region, limiting available
>> memory should be done in the exact same way.
>
> I mean when modifying the command line.

OK, I understand what you mean.

> To place "elfcorehdr=" or "memmap="/"usable-memory=" into the command
> line, one needs to know where it is valid to place it. Appending doesn't
> always work, as per the example above with 51e158c12aca3c9a.

So, are you now suggesting that we put both "elfcorehdr=" and
"usable-memory=" under /chosen in dtb? That's fair enough.
(as far as nobody cares about incompatibility with other archs.)

-Takahiro AKASHI


> For both of these my point was that generally there is some fragility in
> this area. While it's easy to say that breaking this would be someone
> else's fault, we can easily avoid the possibility of that happening, and
> avoid a set of problems trying to maintain backwards compatibility if
> there were a sensible change that happened to break things.
>
> Thanks,
> Mark.
>

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 18/19] arm64: kdump: update a kernel doc
@ 2016-01-22  6:23                               ` AKASHI Takahiro
  0 siblings, 0 replies; 174+ messages in thread
From: AKASHI Takahiro @ 2016-01-22  6:23 UTC (permalink / raw)
  To: Mark Rutland
  Cc: ard.biesheuvel, Geoff Levand, Catalin Marinas, Will Deacon,
	marc.zyngier, James Morse, christoffer.dall, Dave Young, kexec,
	linux-arm-kernel

On 01/21/2016 09:02 PM, Mark Rutland wrote:
> On Thu, Jan 21, 2016 at 03:53:42PM +0900, AKASHI Takahiro wrote:
>> On 01/20/2016 08:49 PM, Mark Rutland wrote:
>>> On Wed, Jan 20, 2016 at 03:07:53PM +0900, AKASHI Takahiro wrote:
>>>> On 01/20/2016 11:49 AM, Dave Young wrote:
>>>>> On 01/19/16 at 02:01pm, Mark Rutland wrote:
>>>>>> On Tue, Jan 19, 2016 at 09:45:53PM +0800, Dave Young wrote:
>>>>>>> On 01/19/16 at 12:51pm, Mark Rutland wrote:
>>>>>>>> On Tue, Jan 19, 2016 at 08:28:48PM +0800, Dave Young wrote:
>>>>>>>>> On 01/19/16 at 02:35pm, AKASHI Takahiro wrote:
>>>>>>>>>> On 01/19/2016 10:43 AM, Dave Young wrote:
>>>>>>>>>>> X86 takes another way in latest kexec-tools and kexec_file_load, that is
>>>>>>>>>>> recreating E820 table and pass it to kexec/kdump kernel, if the entries
>>>>>>>>>>> are over E820 limitation then turn to use setup_data list for remain
>>>>>>>>>>> entries.
>>>>>>>>>>
>>>>>>>>>> Thanks. I will visit x86 code again.
>>>>>>>>>>
>>>>>>>>>>> I think it is X86 specific. Personally I think device tree property is
>>>>>>>>>>> better.
>>>>>>>>>>
>>>>>>>>>> Do you think so?
>>>>>>>>>
>>>>>>>>> I'm not sure it is the best way. For X86 we run into problem with
>>>>>>>>> memmap= design, one example is pci domain X (X>1) need the pci memory
>>>>>>>>> ranges being passed to kdump kernel. When we passed reserved ranges in /proc/iomem
>>>>>>>>> to 2nd kernel we find that cmdline[] array is not big enough.
>>>>>>>>
>>>>>>>> I'm not sure how PCI ranges relate to the memory map used for normal
>>>>>>>> memory (i.e. RAM), though I'm probably missing some caveat with the way
>>>>>>>> ACPI and UEFI describe PCI. Why does memmap= affect PCI memory?
>>>>>>>
>>>>>>> Here is the old patch which was rejected in kexec-tools:
>>>>>>> http://lists.infradead.org/pipermail/kexec/2013-February/007924.html
>>>>>>>
>>>>>>>>
>>>>>>>> If the kernel got the rest of its system topology from DT, the PCI
>>>>>>>> regions would be described there.
>>>>>>>
>>>>>>> Yes, if kdump kernel use same DT as 1st kernel.
>>>>>>
>>>>>> Other than for testing purposes, I don't see why you'd pass the kdump
>>>>>> kernel a DTB inconsistent with that the 1st kernel was passsed (other
>>>>>> than some proerties under /chosen).
>>>>>>
>>>>>> We added /sys/firmware/fdt specifically to allow the kexec tools to get
>>>>>> the exact DTB the first kernel used. There's no reason for tools to have
>>>>>> to make something up.
>>>>>
>>>>> Agreed, but kexec-tools has an option to pass in any dtb files. Who knows
>>>>> how one will use it unless dropping the option and use /sys/firmware/fdt
>>>>> unconditionally.
>>>>
>>>> As a matter of fact, specifying proper command line parameters as well as
>>>> dtb is partly users' responsibility for kdump to work correctly.
>>>> (especially for BE kernel)
>>>>
>>>>> If we choose to implement kexec_file_load only in kernel, the interfaces
>>>>> provided are kernel, initrd and cmdline. We can always use same dtb.
>>>>
>>>> I would say that we can always use the same dtb even with kexec_load
>>> >from user's perspective. Right?
>>>
>>> No.
>>>
>>> This breaks using kexec for boot-loader purposes, and imposes a policy.
>>
>> What kind of policy?
>> I said "can", but if we want to use other setting/configuration, we can
>> still have a full control.
>
> Apologies, I misunderstood.
>
> In most cases, using the existing DTB (with minor modifications to
> /chosen for bootargs and such) is fine. If the user just wants to boot
> another Linux kernel, that's generally what they should do.
>
> I think we're agreed on that.

Yes.

> However, there are cases when the user may want to use a different DTB,
> or use a different purgatory. So we cannot mandate that the existing DTB
> is reused, nor that an in-kernel purgatory must be used, as that imposes
> a policy.

Agree!

>>> For better or worse kexec_file_load has always imposed a constrained
>>> Linux-only policy, so that's a different story.
>>>
>>>>>> There's a horrible edge case I've spotted if performing a chain of
>>>>>> cross-endian kexecs: LE -> BE -> LE, as the BE kernel would have to
>>>>>> respect the EFI memory map so as to avoid corrupting it for the
>>>>>> subsequent LE kernel. Other than this I believe everything should just
>>>>>> work.
>>>>>
>>>>> Firmware do not know kernel endianniess, kernel should respect firmware
>>>>> maps and adapt to it, it sounds like a generic issue not specfic to kexec.
>>>>
>>>> On arm64, a kernel image header has a bit field to specify the image's endianness.
>>>> Anyway, our current implementation replies on a user-supplied dtb to start BE kernel.
>>>
>>> The firmware should _never_ care about the kernel's endianness. The
>>> bootlaoder or first kernel shouldn't care about the next kernel's
>>> endianness apart from in exceptional circumstances. The DTB for a LE
>>> kernel should look identical to that passed to a BE kernel.
>>
>> Please note that I didn't say anything different from your last two statements.
>> The current arm64 kexec implementation doesn't do anything specific to BE,
>> but as far as BE kernel doesn't support UEFI, users are responsible for
>> providing a proper dtb.
>
> I'm just confused as to what you mean by a "proper dtb" in that case.
>
> If you just mean one with memory nodes hacked in, then that would
> currently be a way to make that work, yes.

One of useful cases that I have in my mind is kdump.
We may want to use a small sub-set of dtb, especially devices, to
make the reboot more reliable. Device drivers are likely to be vulnerable
at crash.

> It seems like the better option is to fix the BE kernel to support a
> UEFI memory map, as that solves other issues.

Why did Ard throw away his patch?

>>
>>> In my mind, the only valid reason to look at that bit is so that
>>> bootloaders can provide a warning if the CPU does not implement that
>>> endianness.
>>>
>>> The issue I mention above should be solved by changes to the BE kernel.
>>>
>>>>>>> Is it possible to modify uefi memmap for kdump case?
>>>>>>
>>>>>> Technically it would be possible, however I don't think it's necessary,
>>>>>> and I think it would be disadvantageous to do so.
>>>>>>
>>>>>> Describing the range(s) the crash kernel can use in separate properties
>>>>>> under /chosen has a number of advantages.
>>>>>
>>>>> Ok, I got the points. We have a is_kdump_kernel() by checking if there is
>>>>> elfcorehdr_addr kernel cmdline. This is mainly for some drivers which
>>>>> do not work well in kdump kernel some uncertain reasons. But ideally I
>>>>> think kernel should handle things just like in 1st kernel and avoid to use
>>>>> it.
>>>>
>>>> So I'm not still sure about what are advantages of a property under /chosen
>>>> over "memmap=" kernel parameter.
>>>> Both are simple and can have the same effect with minimizing changes to dtb.
>>>> (But if, in the latter case, we have to provide *all* the memory-related information
>>>> through "memmap=" parameters, it would be much complicated.)
>>>
>>> The reason I prefer a property over command line additions include:
>>
>> Take some examples:
>> (a) a property under /chosen
>>    {
>>      chosen = {
>>        cmdline = "elfcorehdr=AA@BB maxcpus=1 ...";
>>      }
>>      usable-memory = <XX YY>;
>>      memory {
>>        ...
>>      }
>>    }
>>
>> (b) a kernel command line parameter
>>    (I use the same name, "usable-memory", to show the similarity. may use another name though.)
>>    {
>>      chosen = {
>>        cmdline = "elfcorehdr=AA@BB maxcpus=1 usable-memory=YY@XX ...";
>>      }
>>      memory {
>>        ...
>>      }
>>    }
>>
>>> * It keeps the command line simple (as you mention the opposite is
>>>    "complicated").
>>
>> I think both are simple.
>>
>>> * It is logically separate from options the user may pass to the kernel
>>>    in that the restricted region(s) of memory avaialble are effectively
>>>    properties of the system (in that the crashed OS is part of the system
>>>    state).
>>
>> "elfcorehdr=" parameter already breaks your point.
>> "elfcorehdr=" looks to be, what you say, a system property, and is actually
>> added by kexec-tools on all architectures, and "usable-memory", whether it is
>> a DT property or a kernel parameter, will also be added by kexec-tools.
>> (Users don't have to care.)
>
> Just because architectures do one thing today does not mean that we have
> to follow it.
>
> I don't think that breaks my point so much as shows that a different
> approach is taken by others today.
>
> There's also no reason this cannot be a property under /chosen.

No, but no strong reason to be so IMO.

>>> * The semantics of the command line parsing can change subtly over time
>>>    (for example, see 51e158c12aca3c9a, which terminates command line
>>>    parseing at "--"). Maknig sure that a command line option will
>>>    actually be parsed by the next kernel is not trivial.
>>>
>>>    Keeping this information isolated from the command line is more
>>>    robust.
>>
>> Even so, who wants to use kdump without testing?
>> and this is not a kdump specific issue.
>>
>>> * Addition of a property is a self-contained operation, that doesn't
>>>    require any knowledge about the command line.
>>
>> I don't get your point here.
>> For a kernel parameter, early_param() can encapsulate all the stuffs necessary.
>> Once the kernel recognizes a usable memory region, limiting available
>> memory should be done in the exact same way.
>
> I mean when modifying the command line.

OK, I understand what you mean.

> To place "elfcorehdr=" or "memmap="/"usable-memory=" into the command
> line, one needs to know where it is valid to place it. Appending doesn't
> always work, as per the example above with 51e158c12aca3c9a.

So, are you now suggesting that we put both "elfcorehdr=" and
"usable-memory=" under /chosen in dtb? That's fair enough.
(as far as nobody cares about incompatibility with other archs.)

-Takahiro AKASHI


> For both of these my point was that generally there is some fragility in
> this area. While it's easy to say that breaking this would be someone
> else's fault, we can easily avoid the possibility of that happening, and
> avoid a set of problems trying to maintain backwards compatibility if
> there were a sensible change that happened to break things.
>
> Thanks,
> Mark.
>

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 18/19] arm64: kdump: update a kernel doc
  2016-01-22  6:23                               ` AKASHI Takahiro
@ 2016-01-22 11:13                                 ` Mark Rutland
  -1 siblings, 0 replies; 174+ messages in thread
From: Mark Rutland @ 2016-01-22 11:13 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, Jan 22, 2016 at 03:23:14PM +0900, AKASHI Takahiro wrote:
> On 01/21/2016 09:02 PM, Mark Rutland wrote:
> >On Thu, Jan 21, 2016 at 03:53:42PM +0900, AKASHI Takahiro wrote:
> >>On 01/20/2016 08:49 PM, Mark Rutland wrote:
> >>>On Wed, Jan 20, 2016 at 03:07:53PM +0900, AKASHI Takahiro wrote:
> >>>>On 01/20/2016 11:49 AM, Dave Young wrote:
> >>>>>Firmware do not know kernel endianniess, kernel should respect firmware
> >>>>>maps and adapt to it, it sounds like a generic issue not specfic to kexec.
> >>>>
> >>>>On arm64, a kernel image header has a bit field to specify the image's endianness.
> >>>>Anyway, our current implementation replies on a user-supplied dtb to start BE kernel.
> >>>
> >>>The firmware should _never_ care about the kernel's endianness. The
> >>>bootlaoder or first kernel shouldn't care about the next kernel's
> >>>endianness apart from in exceptional circumstances. The DTB for a LE
> >>>kernel should look identical to that passed to a BE kernel.
> >>
> >>Please note that I didn't say anything different from your last two statements.
> >>The current arm64 kexec implementation doesn't do anything specific to BE,
> >>but as far as BE kernel doesn't support UEFI, users are responsible for
> >>providing a proper dtb.
> >
> >I'm just confused as to what you mean by a "proper dtb" in that case.
> >
> >If you just mean one with memory nodes hacked in, then that would
> >currently be a way to make that work, yes.
> 
> One of useful cases that I have in my mind is kdump.
> We may want to use a small sub-set of dtb, especially devices, to
> make the reboot more reliable. Device drivers are likely to be vulnerable
> at crash.

I don't think that we can reliably have userspace carve out devices from
the DTB or from ACPI tables in order to achieve that. That's going to
end up complex and/or incomplete. We also can't do this in the
kexec_load_file / Secure Boot case.

That's not to say we cannot try, as it's possible when using kexec_load.
However, it's only going to be possible on a subset of systems, and it
would probably make sense to reserve this approach to those cases we
cannot work around by other means (e.g. whitelisting "safe" devices in
the kdump kernel, forcing explicit resets, etc).

> >It seems like the better option is to fix the BE kernel to support a
> >UEFI memory map, as that solves other issues.
> 
> Why did Ard throw away his patch?

In the absence of kexec it wasn't necessary, it only supported a subset
of the runtime services (and no other features like DMI IIRC), and it
looked like it would be painful to debug (if something went wrong while
a CPU was in LE mode, we couldn't even panic()).

Given BE kernels on UEFI were never supported until that point, there
wasn't a compelling reason to support that case.

Even if we support the UEFI memory map, I don't think it's worth the
effort to support runtime services, ACPI, and related code that's only
ever been tested on LE. So realistically this would only work on systems
using UEFI && DT rather than UEFI && ACPI.

> So, are you now suggesting that we put both "elfcorehdr=" and
> "usable-memory=" under /chosen in dtb?

Yes.

> That's fair enough.  (as far as nobody cares about incompatibility
> with other archs.)

Glad to hear! :)

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 18/19] arm64: kdump: update a kernel doc
@ 2016-01-22 11:13                                 ` Mark Rutland
  0 siblings, 0 replies; 174+ messages in thread
From: Mark Rutland @ 2016-01-22 11:13 UTC (permalink / raw)
  To: AKASHI Takahiro
  Cc: ard.biesheuvel, Geoff Levand, Catalin Marinas, Will Deacon,
	marc.zyngier, James Morse, christoffer.dall, Dave Young, kexec,
	linux-arm-kernel

On Fri, Jan 22, 2016 at 03:23:14PM +0900, AKASHI Takahiro wrote:
> On 01/21/2016 09:02 PM, Mark Rutland wrote:
> >On Thu, Jan 21, 2016 at 03:53:42PM +0900, AKASHI Takahiro wrote:
> >>On 01/20/2016 08:49 PM, Mark Rutland wrote:
> >>>On Wed, Jan 20, 2016 at 03:07:53PM +0900, AKASHI Takahiro wrote:
> >>>>On 01/20/2016 11:49 AM, Dave Young wrote:
> >>>>>Firmware do not know kernel endianniess, kernel should respect firmware
> >>>>>maps and adapt to it, it sounds like a generic issue not specfic to kexec.
> >>>>
> >>>>On arm64, a kernel image header has a bit field to specify the image's endianness.
> >>>>Anyway, our current implementation replies on a user-supplied dtb to start BE kernel.
> >>>
> >>>The firmware should _never_ care about the kernel's endianness. The
> >>>bootlaoder or first kernel shouldn't care about the next kernel's
> >>>endianness apart from in exceptional circumstances. The DTB for a LE
> >>>kernel should look identical to that passed to a BE kernel.
> >>
> >>Please note that I didn't say anything different from your last two statements.
> >>The current arm64 kexec implementation doesn't do anything specific to BE,
> >>but as far as BE kernel doesn't support UEFI, users are responsible for
> >>providing a proper dtb.
> >
> >I'm just confused as to what you mean by a "proper dtb" in that case.
> >
> >If you just mean one with memory nodes hacked in, then that would
> >currently be a way to make that work, yes.
> 
> One of useful cases that I have in my mind is kdump.
> We may want to use a small sub-set of dtb, especially devices, to
> make the reboot more reliable. Device drivers are likely to be vulnerable
> at crash.

I don't think that we can reliably have userspace carve out devices from
the DTB or from ACPI tables in order to achieve that. That's going to
end up complex and/or incomplete. We also can't do this in the
kexec_load_file / Secure Boot case.

That's not to say we cannot try, as it's possible when using kexec_load.
However, it's only going to be possible on a subset of systems, and it
would probably make sense to reserve this approach to those cases we
cannot work around by other means (e.g. whitelisting "safe" devices in
the kdump kernel, forcing explicit resets, etc).

> >It seems like the better option is to fix the BE kernel to support a
> >UEFI memory map, as that solves other issues.
> 
> Why did Ard throw away his patch?

In the absence of kexec it wasn't necessary, it only supported a subset
of the runtime services (and no other features like DMI IIRC), and it
looked like it would be painful to debug (if something went wrong while
a CPU was in LE mode, we couldn't even panic()).

Given BE kernels on UEFI were never supported until that point, there
wasn't a compelling reason to support that case.

Even if we support the UEFI memory map, I don't think it's worth the
effort to support runtime services, ACPI, and related code that's only
ever been tested on LE. So realistically this would only work on systems
using UEFI && DT rather than UEFI && ACPI.

> So, are you now suggesting that we put both "elfcorehdr=" and
> "usable-memory=" under /chosen in dtb?

Yes.

> That's fair enough.  (as far as nobody cares about incompatibility
> with other archs.)

Glad to hear! :)

Thanks,
Mark.

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 18/19] arm64: kdump: update a kernel doc
  2016-01-22  6:23                               ` AKASHI Takahiro
@ 2016-01-25  3:19                                 ` Dave Young
  -1 siblings, 0 replies; 174+ messages in thread
From: Dave Young @ 2016-01-25  3:19 UTC (permalink / raw)
  To: linux-arm-kernel

Hi, AKASHI

> >To place "elfcorehdr=" or "memmap="/"usable-memory=" into the command
> >line, one needs to know where it is valid to place it. Appending doesn't
> >always work, as per the example above with 51e158c12aca3c9a.
> 
> So, are you now suggesting that we put both "elfcorehdr=" and
> "usable-memory=" under /chosen in dtb? That's fair enough.
> (as far as nobody cares about incompatibility with other archs.)

You may need move is_kdump_kernel as a weak function so that in arm64 you
can still use it in kdump kernel.

Thanks
Dave

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 18/19] arm64: kdump: update a kernel doc
@ 2016-01-25  3:19                                 ` Dave Young
  0 siblings, 0 replies; 174+ messages in thread
From: Dave Young @ 2016-01-25  3:19 UTC (permalink / raw)
  To: AKASHI Takahiro
  Cc: Mark Rutland, ard.biesheuvel, Geoff Levand, Catalin Marinas,
	Will Deacon, marc.zyngier, James Morse, christoffer.dall, kexec,
	linux-arm-kernel

Hi, AKASHI

> >To place "elfcorehdr=" or "memmap="/"usable-memory=" into the command
> >line, one needs to know where it is valid to place it. Appending doesn't
> >always work, as per the example above with 51e158c12aca3c9a.
> 
> So, are you now suggesting that we put both "elfcorehdr=" and
> "usable-memory=" under /chosen in dtb? That's fair enough.
> (as far as nobody cares about incompatibility with other archs.)

You may need move is_kdump_kernel as a weak function so that in arm64 you
can still use it in kdump kernel.

Thanks
Dave

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 18/19] arm64: kdump: update a kernel doc
  2016-01-25  3:19                                 ` Dave Young
@ 2016-01-25  4:23                                   ` Dave Young
  -1 siblings, 0 replies; 174+ messages in thread
From: Dave Young @ 2016-01-25  4:23 UTC (permalink / raw)
  To: linux-arm-kernel

On 01/25/16 at 11:19am, Dave Young wrote:
> Hi, AKASHI
> 
> > >To place "elfcorehdr=" or "memmap="/"usable-memory=" into the command
> > >line, one needs to know where it is valid to place it. Appending doesn't
> > >always work, as per the example above with 51e158c12aca3c9a.
> > 
> > So, are you now suggesting that we put both "elfcorehdr=" and
> > "usable-memory=" under /chosen in dtb? That's fair enough.
> > (as far as nobody cares about incompatibility with other archs.)
> 
> You may need move is_kdump_kernel as a weak function so that in arm64 you
> can still use it in kdump kernel.

Ignore the comment please, you just use a different way to set the variable
is_kdump_kernel will still work..

It is pretty fine to use dtb along with usable memories then.

Thanks
Dave

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 18/19] arm64: kdump: update a kernel doc
@ 2016-01-25  4:23                                   ` Dave Young
  0 siblings, 0 replies; 174+ messages in thread
From: Dave Young @ 2016-01-25  4:23 UTC (permalink / raw)
  To: AKASHI Takahiro
  Cc: Mark Rutland, ard.biesheuvel, Geoff Levand, Catalin Marinas,
	Will Deacon, marc.zyngier, James Morse, linux-arm-kernel, kexec,
	christoffer.dall

On 01/25/16 at 11:19am, Dave Young wrote:
> Hi, AKASHI
> 
> > >To place "elfcorehdr=" or "memmap="/"usable-memory=" into the command
> > >line, one needs to know where it is valid to place it. Appending doesn't
> > >always work, as per the example above with 51e158c12aca3c9a.
> > 
> > So, are you now suggesting that we put both "elfcorehdr=" and
> > "usable-memory=" under /chosen in dtb? That's fair enough.
> > (as far as nobody cares about incompatibility with other archs.)
> 
> You may need move is_kdump_kernel as a weak function so that in arm64 you
> can still use it in kdump kernel.

Ignore the comment please, you just use a different way to set the variable
is_kdump_kernel will still work..

It is pretty fine to use dtb along with usable memories then.

Thanks
Dave

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 04/19] arm64: Cleanup SCTLR flags
  2016-01-15 19:18   ` Geoff Levand
@ 2016-01-25 15:09     ` James Morse
  -1 siblings, 0 replies; 174+ messages in thread
From: James Morse @ 2016-01-25 15:09 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Geoff,

On 15/01/16 19:18, Geoff Levand wrote:
> We currently have macros defining flags for the arm64 sctlr registers in both
> kvm_arm.h and sysreg.h.  To clean things up and simplify move the definitions
> of the SCTLR_EL2 flags from kvm_arm.h to sysreg.h, rename any SCTLR_EL1 or
> SCTLR_EL2 flags that are common to both registers to be SCTLR_ELx, with 'x'
> indicating a common flag, and fixup all files to include the proper header or
> to use the new macro names.
> 
> Signed-off-by: Geoff Levand <geoff@infradead.org>
> ---

> diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
> index 178ba22..1d7e502 100644
> --- a/arch/arm64/kvm/hyp-init.S
> +++ b/arch/arm64/kvm/hyp-init.S
> @@ -20,7 +20,7 @@
>  #include <asm/assembler.h>
>  #include <asm/kvm_arm.h>
>  #include <asm/kvm_mmu.h>
> -#include <asm/pgtable-hwdef.h>

I think this one crept in... this header file is needed for the
definition of TCR_T0SZ_OFFSET and TCR_TxSZ_WIDTH.


> +#include <asm/sysreg.h>
>  
>  	.text
>  	.pushsection	.hyp.idmap.text, "ax"


Thanks,

James

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 04/19] arm64: Cleanup SCTLR flags
@ 2016-01-25 15:09     ` James Morse
  0 siblings, 0 replies; 174+ messages in thread
From: James Morse @ 2016-01-25 15:09 UTC (permalink / raw)
  To: Geoff Levand
  Cc: Mark Rutland, marc.zyngier, Catalin Marinas, Will Deacon,
	AKASHI Takahiro, linux-arm-kernel, kexec, christoffer.dall

Hi Geoff,

On 15/01/16 19:18, Geoff Levand wrote:
> We currently have macros defining flags for the arm64 sctlr registers in both
> kvm_arm.h and sysreg.h.  To clean things up and simplify move the definitions
> of the SCTLR_EL2 flags from kvm_arm.h to sysreg.h, rename any SCTLR_EL1 or
> SCTLR_EL2 flags that are common to both registers to be SCTLR_ELx, with 'x'
> indicating a common flag, and fixup all files to include the proper header or
> to use the new macro names.
> 
> Signed-off-by: Geoff Levand <geoff@infradead.org>
> ---

> diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
> index 178ba22..1d7e502 100644
> --- a/arch/arm64/kvm/hyp-init.S
> +++ b/arch/arm64/kvm/hyp-init.S
> @@ -20,7 +20,7 @@
>  #include <asm/assembler.h>
>  #include <asm/kvm_arm.h>
>  #include <asm/kvm_mmu.h>
> -#include <asm/pgtable-hwdef.h>

I think this one crept in... this header file is needed for the
definition of TCR_T0SZ_OFFSET and TCR_TxSZ_WIDTH.


> +#include <asm/sysreg.h>
>  
>  	.text
>  	.pushsection	.hyp.idmap.text, "ax"


Thanks,

James



_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 10/19] arm64: kvm: allows kvm cpu hotplug
  2016-01-15 19:18   ` Geoff Levand
@ 2016-01-26 17:42     ` James Morse
  -1 siblings, 0 replies; 174+ messages in thread
From: James Morse @ 2016-01-26 17:42 UTC (permalink / raw)
  To: linux-arm-kernel

Hi!

On 15/01/16 19:18, Geoff Levand wrote:
> From: AKASHI Takahiro <takahiro.akashi@linaro.org>
> 
> The current kvm implementation on arm64 does cpu-specific initialization
> at system boot, and has no way to gracefully shutdown a core in terms of
> kvm. This prevents, especially, kexec from rebooting the system on a boot
> core in EL2.
> 
> This patch adds a cpu tear-down function and also puts an existing cpu-init
> code into a separate function, kvm_arch_hardware_disable() and
> kvm_arch_hardware_enable() respectively.
> We don't need arm64-specific cpu hotplug hook any more.
> 
> Since this patch modifies common part of code between arm and arm64, one
> stub definition, __cpu_reset_hyp_mode(), is added on arm side to avoid
> compiling errors.
> 
> Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>

> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
> index e06fd29..e91f80e 100644
> --- a/arch/arm/kvm/arm.c
> +++ b/arch/arm/kvm/arm.c

>  #ifdef CONFIG_CPU_PM
>  static int hyp_init_cpu_pm_notifier(struct notifier_block *self,
>  				    unsigned long cmd,
>  				    void *v)
>  {
> -	if (cmd == CPU_PM_EXIT &&
> -	    __hyp_get_vectors() == hyp_default_vectors) {
> -		cpu_init_hyp_mode(NULL);
> +	switch (cmd) {
> +	case CPU_PM_ENTER:
> +		if (__this_cpu_read(kvm_arm_hardware_enabled))
> +			cpu_reset_hyp_mode();
> +
>  		return NOTIFY_OK;
> -	}
> +	case CPU_PM_EXIT:
> +		if (__this_cpu_read(kvm_arm_hardware_enabled))
> +			cpu_init_hyp_mode();

I read this as:
		if (enabled)
			enable();

What am I missing? Is there a missing '!'?

[/me thinks some more]

I suspect this is trying to be clever: leaving the flag set over a
deep-sleep, to indicate that the hardware should be re-enabled when we
resume... if so, a comment to that effect would be good.


>  
> -	return NOTIFY_DONE;
> +		return NOTIFY_OK;
> +
> +	default:
> +		return NOTIFY_DONE;
> +	}
>  }
>  
>  static struct notifier_block hyp_init_cpu_pm_nb = {

> diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
> index 3070096..bca79f9 100644
> --- a/arch/arm64/include/asm/virt.h
> +++ b/arch/arm64/include/asm/virt.h
> @@ -58,9 +58,18 @@
>  
>  #define HVC_CALL_FUNC 3
>  
> +/*
> + * HVC_RESET_CPU - Reset cpu in EL2 to initial state.
> + *
> + * @x0: entry address in trampoline code in va
> + * @x1: identical mapping page table in pa
> + */
> +
>  #define BOOT_CPU_MODE_EL1	(0xe11)
>  #define BOOT_CPU_MODE_EL2	(0xe12)
>  
> +#define HVC_RESET_CPU 4
> +

Patch 5 added a fancy new way to call arbitrary functions at el2, why
not use that? (it would save beating up el1_sync again).

I agree the trampoline stuff is complicated - I will try and cook-up a
version of this patch for hibernate that does this. (... and comment
what I think is happening above while I'm at it)


>  #ifndef __ASSEMBLY__
>  
>  /*
> diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
> index 1d7e502..d909ce2 100644
> --- a/arch/arm64/kvm/hyp-init.S
> +++ b/arch/arm64/kvm/hyp-init.S
> @@ -140,6 +140,39 @@ merged:
>  	eret
>  ENDPROC(__kvm_hyp_init)
>  
> +	/*
> +	 * x0: HYP boot pgd
> +	 * x1: HYP phys_idmap_start
> +	 */
> +ENTRY(__kvm_hyp_reset)
> +	/* We're in trampoline code in VA, switch back to boot page tables */
> +	msr	ttbr0_el2, x0
> +	isb
> +
> +	/* Invalidate the old TLBs */
> +	tlbi	alle2
> +	dsb	sy
> +
> +	/* Branch into PA space */
> +	adr	x0, 1f
> +	bfi	x1, x0, #0, #PAGE_SHIFT
> +	br	x1
> +
> +	/* We're now in idmap, disable MMU */
> +1:	mrs	x0, sctlr_el2
> +	ldr	x1, =SCTLR_ELx_FLAGS
> +	bic	x0, x0, x1		// Clear SCTL_M and etc
> +	msr	sctlr_el2, x0
> +	isb
> +
> +	/* Install stub vectors */
> +	adrp	x0, __hyp_stub_vectors
> +	add	x0, x0, #:lo12:__hyp_stub_vectors

adr_l ?

> +	msr	vbar_el2, x0
> +
> +	eret
> +ENDPROC(__kvm_hyp_reset)
> +
>  	.ltorg
>  
>  	.popsection
> diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
> index 15b1ef9..ed82dc2 100644
> --- a/arch/arm64/kvm/hyp.S
> +++ b/arch/arm64/kvm/hyp.S
> @@ -986,10 +991,27 @@ el1_sync:					// Guest trapped into EL2
>  	cmp	x18, #HVC_GET_VECTORS
>  	b.ne	1f
>  	mrs	x0, vbar_el2
> -	b	2f
> -
> -1:	/* Default to HVC_CALL_HYP. */
> +	b	do_eret
>  
> +	/* jump into trampoline code */
> +1:	cmp	x18, #HVC_RESET_CPU
> +	b.ne	2f
> +	/*
> +	 * Entry point is:
> +	 *	TRAMPOLINE_VA
> +	 *	+ (__kvm_hyp_reset - (__hyp_idmap_text_start & PAGE_MASK))
> +	 */
> +	adrp	x2, __kvm_hyp_reset
> +	add	x2, x2, #:lo12:__kvm_hyp_reset
> +	adrp	x3, __hyp_idmap_text_start
> +	add	x3, x3, #:lo12:__hyp_idmap_text_start

adr_l ?

> +	and	x3, x3, PAGE_MASK
> +	sub	x2, x2, x3
> +	ldr	x3, =TRAMPOLINE_VA
> +	add	x2, x2, x3
> +	br	x2				// no return
> +
> +2:	/* Default to HVC_CALL_HYP. */
>  	push	lr, xzr
>  
>  	/*


Thanks,

James

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 10/19] arm64: kvm: allows kvm cpu hotplug
@ 2016-01-26 17:42     ` James Morse
  0 siblings, 0 replies; 174+ messages in thread
From: James Morse @ 2016-01-26 17:42 UTC (permalink / raw)
  To: AKASHI Takahiro
  Cc: Mark Rutland, Geoff Levand, Catalin Marinas, Will Deacon,
	marc.zyngier, linux-arm-kernel, kexec, christoffer.dall

Hi!

On 15/01/16 19:18, Geoff Levand wrote:
> From: AKASHI Takahiro <takahiro.akashi@linaro.org>
> 
> The current kvm implementation on arm64 does cpu-specific initialization
> at system boot, and has no way to gracefully shutdown a core in terms of
> kvm. This prevents, especially, kexec from rebooting the system on a boot
> core in EL2.
> 
> This patch adds a cpu tear-down function and also puts an existing cpu-init
> code into a separate function, kvm_arch_hardware_disable() and
> kvm_arch_hardware_enable() respectively.
> We don't need arm64-specific cpu hotplug hook any more.
> 
> Since this patch modifies common part of code between arm and arm64, one
> stub definition, __cpu_reset_hyp_mode(), is added on arm side to avoid
> compiling errors.
> 
> Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>

> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
> index e06fd29..e91f80e 100644
> --- a/arch/arm/kvm/arm.c
> +++ b/arch/arm/kvm/arm.c

>  #ifdef CONFIG_CPU_PM
>  static int hyp_init_cpu_pm_notifier(struct notifier_block *self,
>  				    unsigned long cmd,
>  				    void *v)
>  {
> -	if (cmd == CPU_PM_EXIT &&
> -	    __hyp_get_vectors() == hyp_default_vectors) {
> -		cpu_init_hyp_mode(NULL);
> +	switch (cmd) {
> +	case CPU_PM_ENTER:
> +		if (__this_cpu_read(kvm_arm_hardware_enabled))
> +			cpu_reset_hyp_mode();
> +
>  		return NOTIFY_OK;
> -	}
> +	case CPU_PM_EXIT:
> +		if (__this_cpu_read(kvm_arm_hardware_enabled))
> +			cpu_init_hyp_mode();

I read this as:
		if (enabled)
			enable();

What am I missing? Is there a missing '!'?

[/me thinks some more]

I suspect this is trying to be clever: leaving the flag set over a
deep-sleep, to indicate that the hardware should be re-enabled when we
resume... if so, a comment to that effect would be good.


>  
> -	return NOTIFY_DONE;
> +		return NOTIFY_OK;
> +
> +	default:
> +		return NOTIFY_DONE;
> +	}
>  }
>  
>  static struct notifier_block hyp_init_cpu_pm_nb = {

> diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
> index 3070096..bca79f9 100644
> --- a/arch/arm64/include/asm/virt.h
> +++ b/arch/arm64/include/asm/virt.h
> @@ -58,9 +58,18 @@
>  
>  #define HVC_CALL_FUNC 3
>  
> +/*
> + * HVC_RESET_CPU - Reset cpu in EL2 to initial state.
> + *
> + * @x0: entry address in trampoline code in va
> + * @x1: identical mapping page table in pa
> + */
> +
>  #define BOOT_CPU_MODE_EL1	(0xe11)
>  #define BOOT_CPU_MODE_EL2	(0xe12)
>  
> +#define HVC_RESET_CPU 4
> +

Patch 5 added a fancy new way to call arbitrary functions at el2, why
not use that? (it would save beating up el1_sync again).

I agree the trampoline stuff is complicated - I will try and cook-up a
version of this patch for hibernate that does this. (... and comment
what I think is happening above while I'm at it)


>  #ifndef __ASSEMBLY__
>  
>  /*
> diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
> index 1d7e502..d909ce2 100644
> --- a/arch/arm64/kvm/hyp-init.S
> +++ b/arch/arm64/kvm/hyp-init.S
> @@ -140,6 +140,39 @@ merged:
>  	eret
>  ENDPROC(__kvm_hyp_init)
>  
> +	/*
> +	 * x0: HYP boot pgd
> +	 * x1: HYP phys_idmap_start
> +	 */
> +ENTRY(__kvm_hyp_reset)
> +	/* We're in trampoline code in VA, switch back to boot page tables */
> +	msr	ttbr0_el2, x0
> +	isb
> +
> +	/* Invalidate the old TLBs */
> +	tlbi	alle2
> +	dsb	sy
> +
> +	/* Branch into PA space */
> +	adr	x0, 1f
> +	bfi	x1, x0, #0, #PAGE_SHIFT
> +	br	x1
> +
> +	/* We're now in idmap, disable MMU */
> +1:	mrs	x0, sctlr_el2
> +	ldr	x1, =SCTLR_ELx_FLAGS
> +	bic	x0, x0, x1		// Clear SCTL_M and etc
> +	msr	sctlr_el2, x0
> +	isb
> +
> +	/* Install stub vectors */
> +	adrp	x0, __hyp_stub_vectors
> +	add	x0, x0, #:lo12:__hyp_stub_vectors

adr_l ?

> +	msr	vbar_el2, x0
> +
> +	eret
> +ENDPROC(__kvm_hyp_reset)
> +
>  	.ltorg
>  
>  	.popsection
> diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
> index 15b1ef9..ed82dc2 100644
> --- a/arch/arm64/kvm/hyp.S
> +++ b/arch/arm64/kvm/hyp.S
> @@ -986,10 +991,27 @@ el1_sync:					// Guest trapped into EL2
>  	cmp	x18, #HVC_GET_VECTORS
>  	b.ne	1f
>  	mrs	x0, vbar_el2
> -	b	2f
> -
> -1:	/* Default to HVC_CALL_HYP. */
> +	b	do_eret
>  
> +	/* jump into trampoline code */
> +1:	cmp	x18, #HVC_RESET_CPU
> +	b.ne	2f
> +	/*
> +	 * Entry point is:
> +	 *	TRAMPOLINE_VA
> +	 *	+ (__kvm_hyp_reset - (__hyp_idmap_text_start & PAGE_MASK))
> +	 */
> +	adrp	x2, __kvm_hyp_reset
> +	add	x2, x2, #:lo12:__kvm_hyp_reset
> +	adrp	x3, __hyp_idmap_text_start
> +	add	x3, x3, #:lo12:__hyp_idmap_text_start

adr_l ?

> +	and	x3, x3, PAGE_MASK
> +	sub	x2, x2, x3
> +	ldr	x3, =TRAMPOLINE_VA
> +	add	x2, x2, x3
> +	br	x2				// no return
> +
> +2:	/* Default to HVC_CALL_HYP. */
>  	push	lr, xzr
>  
>  	/*


Thanks,

James


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 10/19] arm64: kvm: allows kvm cpu hotplug
  2016-01-26 17:42     ` James Morse
@ 2016-01-27  7:37       ` AKASHI Takahiro
  -1 siblings, 0 replies; 174+ messages in thread
From: AKASHI Takahiro @ 2016-01-27  7:37 UTC (permalink / raw)
  To: linux-arm-kernel

On 01/27/2016 02:42 AM, James Morse wrote:
> Hi!
>
> On 15/01/16 19:18, Geoff Levand wrote:
>> From: AKASHI Takahiro <takahiro.akashi@linaro.org>
>>
>> The current kvm implementation on arm64 does cpu-specific initialization
>> at system boot, and has no way to gracefully shutdown a core in terms of
>> kvm. This prevents, especially, kexec from rebooting the system on a boot
>> core in EL2.
>>
>> This patch adds a cpu tear-down function and also puts an existing cpu-init
>> code into a separate function, kvm_arch_hardware_disable() and
>> kvm_arch_hardware_enable() respectively.
>> We don't need arm64-specific cpu hotplug hook any more.
>>
>> Since this patch modifies common part of code between arm and arm64, one
>> stub definition, __cpu_reset_hyp_mode(), is added on arm side to avoid
>> compiling errors.
>>
>> Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
>
>> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
>> index e06fd29..e91f80e 100644
>> --- a/arch/arm/kvm/arm.c
>> +++ b/arch/arm/kvm/arm.c
>
>>   #ifdef CONFIG_CPU_PM
>>   static int hyp_init_cpu_pm_notifier(struct notifier_block *self,
>>   				    unsigned long cmd,
>>   				    void *v)
>>   {
>> -	if (cmd == CPU_PM_EXIT &&
>> -	    __hyp_get_vectors() == hyp_default_vectors) {
>> -		cpu_init_hyp_mode(NULL);
>> +	switch (cmd) {
>> +	case CPU_PM_ENTER:
>> +		if (__this_cpu_read(kvm_arm_hardware_enabled))
>> +			cpu_reset_hyp_mode();
>> +
>>   		return NOTIFY_OK;
>> -	}
>> +	case CPU_PM_EXIT:
>> +		if (__this_cpu_read(kvm_arm_hardware_enabled))
>> +			cpu_init_hyp_mode();
>
> I read this as:
> 		if (enabled)
> 			enable();
>
> What am I missing? Is there a missing '!'?
>
> [/me thinks some more]
>
> I suspect this is trying to be clever: leaving the flag set over a
> deep-sleep, to indicate that the hardware should be re-enabled when we
> resume... if so, a comment to that effect would be good.

Yep, I meant so. Will add some comment.

>
>>
>> -	return NOTIFY_DONE;
>> +		return NOTIFY_OK;
>> +
>> +	default:
>> +		return NOTIFY_DONE;
>> +	}
>>   }
>>
>>   static struct notifier_block hyp_init_cpu_pm_nb = {
>
>> diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
>> index 3070096..bca79f9 100644
>> --- a/arch/arm64/include/asm/virt.h
>> +++ b/arch/arm64/include/asm/virt.h
>> @@ -58,9 +58,18 @@
>>
>>   #define HVC_CALL_FUNC 3
>>
>> +/*
>> + * HVC_RESET_CPU - Reset cpu in EL2 to initial state.
>> + *
>> + * @x0: entry address in trampoline code in va
>> + * @x1: identical mapping page table in pa
>> + */
>> +
>>   #define BOOT_CPU_MODE_EL1	(0xe11)
>>   #define BOOT_CPU_MODE_EL2	(0xe12)
>>
>> +#define HVC_RESET_CPU 4
>> +
>
> Patch 5 added a fancy new way to call arbitrary functions at el2, why
> not use that? (it would save beating up el1_sync again).

Let me think. I need to detangle some header files.

> I agree the trampoline stuff is complicated - I will try and cook-up a
> version of this patch for hibernate that does this. (... and comment
> what I think is happening above while I'm at it)
>
>
>>   #ifndef __ASSEMBLY__
>>
>>   /*
>> diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
>> index 1d7e502..d909ce2 100644
>> --- a/arch/arm64/kvm/hyp-init.S
>> +++ b/arch/arm64/kvm/hyp-init.S
>> @@ -140,6 +140,39 @@ merged:
>>   	eret
>>   ENDPROC(__kvm_hyp_init)
>>
>> +	/*
>> +	 * x0: HYP boot pgd
>> +	 * x1: HYP phys_idmap_start
>> +	 */
>> +ENTRY(__kvm_hyp_reset)
>> +	/* We're in trampoline code in VA, switch back to boot page tables */
>> +	msr	ttbr0_el2, x0
>> +	isb
>> +
>> +	/* Invalidate the old TLBs */
>> +	tlbi	alle2
>> +	dsb	sy
>> +
>> +	/* Branch into PA space */
>> +	adr	x0, 1f
>> +	bfi	x1, x0, #0, #PAGE_SHIFT
>> +	br	x1
>> +
>> +	/* We're now in idmap, disable MMU */
>> +1:	mrs	x0, sctlr_el2
>> +	ldr	x1, =SCTLR_ELx_FLAGS
>> +	bic	x0, x0, x1		// Clear SCTL_M and etc
>> +	msr	sctlr_el2, x0
>> +	isb
>> +
>> +	/* Install stub vectors */
>> +	adrp	x0, __hyp_stub_vectors
>> +	add	x0, x0, #:lo12:__hyp_stub_vectors
>
> adr_l ?

OK.

Thanks,
-Takahiro AKASHI

>> +	msr	vbar_el2, x0
>> +
>> +	eret
>> +ENDPROC(__kvm_hyp_reset)
>> +
>>   	.ltorg
>>
>>   	.popsection
>> diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
>> index 15b1ef9..ed82dc2 100644
>> --- a/arch/arm64/kvm/hyp.S
>> +++ b/arch/arm64/kvm/hyp.S
>> @@ -986,10 +991,27 @@ el1_sync:					// Guest trapped into EL2
>>   	cmp	x18, #HVC_GET_VECTORS
>>   	b.ne	1f
>>   	mrs	x0, vbar_el2
>> -	b	2f
>> -
>> -1:	/* Default to HVC_CALL_HYP. */
>> +	b	do_eret
>>
>> +	/* jump into trampoline code */
>> +1:	cmp	x18, #HVC_RESET_CPU
>> +	b.ne	2f
>> +	/*
>> +	 * Entry point is:
>> +	 *	TRAMPOLINE_VA
>> +	 *	+ (__kvm_hyp_reset - (__hyp_idmap_text_start & PAGE_MASK))
>> +	 */
>> +	adrp	x2, __kvm_hyp_reset
>> +	add	x2, x2, #:lo12:__kvm_hyp_reset
>> +	adrp	x3, __hyp_idmap_text_start
>> +	add	x3, x3, #:lo12:__hyp_idmap_text_start
>
> adr_l ?
>
>> +	and	x3, x3, PAGE_MASK
>> +	sub	x2, x2, x3
>> +	ldr	x3, =TRAMPOLINE_VA
>> +	add	x2, x2, x3
>> +	br	x2				// no return
>> +
>> +2:	/* Default to HVC_CALL_HYP. */
>>   	push	lr, xzr
>>
>>   	/*
>
>
> Thanks,
>
> James
>

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 10/19] arm64: kvm: allows kvm cpu hotplug
@ 2016-01-27  7:37       ` AKASHI Takahiro
  0 siblings, 0 replies; 174+ messages in thread
From: AKASHI Takahiro @ 2016-01-27  7:37 UTC (permalink / raw)
  To: James Morse
  Cc: Mark Rutland, Geoff Levand, Catalin Marinas, Will Deacon,
	marc.zyngier, linux-arm-kernel, kexec, christoffer.dall

On 01/27/2016 02:42 AM, James Morse wrote:
> Hi!
>
> On 15/01/16 19:18, Geoff Levand wrote:
>> From: AKASHI Takahiro <takahiro.akashi@linaro.org>
>>
>> The current kvm implementation on arm64 does cpu-specific initialization
>> at system boot, and has no way to gracefully shutdown a core in terms of
>> kvm. This prevents, especially, kexec from rebooting the system on a boot
>> core in EL2.
>>
>> This patch adds a cpu tear-down function and also puts an existing cpu-init
>> code into a separate function, kvm_arch_hardware_disable() and
>> kvm_arch_hardware_enable() respectively.
>> We don't need arm64-specific cpu hotplug hook any more.
>>
>> Since this patch modifies common part of code between arm and arm64, one
>> stub definition, __cpu_reset_hyp_mode(), is added on arm side to avoid
>> compiling errors.
>>
>> Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
>
>> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
>> index e06fd29..e91f80e 100644
>> --- a/arch/arm/kvm/arm.c
>> +++ b/arch/arm/kvm/arm.c
>
>>   #ifdef CONFIG_CPU_PM
>>   static int hyp_init_cpu_pm_notifier(struct notifier_block *self,
>>   				    unsigned long cmd,
>>   				    void *v)
>>   {
>> -	if (cmd == CPU_PM_EXIT &&
>> -	    __hyp_get_vectors() == hyp_default_vectors) {
>> -		cpu_init_hyp_mode(NULL);
>> +	switch (cmd) {
>> +	case CPU_PM_ENTER:
>> +		if (__this_cpu_read(kvm_arm_hardware_enabled))
>> +			cpu_reset_hyp_mode();
>> +
>>   		return NOTIFY_OK;
>> -	}
>> +	case CPU_PM_EXIT:
>> +		if (__this_cpu_read(kvm_arm_hardware_enabled))
>> +			cpu_init_hyp_mode();
>
> I read this as:
> 		if (enabled)
> 			enable();
>
> What am I missing? Is there a missing '!'?
>
> [/me thinks some more]
>
> I suspect this is trying to be clever: leaving the flag set over a
> deep-sleep, to indicate that the hardware should be re-enabled when we
> resume... if so, a comment to that effect would be good.

Yep, I meant so. Will add some comment.

>
>>
>> -	return NOTIFY_DONE;
>> +		return NOTIFY_OK;
>> +
>> +	default:
>> +		return NOTIFY_DONE;
>> +	}
>>   }
>>
>>   static struct notifier_block hyp_init_cpu_pm_nb = {
>
>> diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
>> index 3070096..bca79f9 100644
>> --- a/arch/arm64/include/asm/virt.h
>> +++ b/arch/arm64/include/asm/virt.h
>> @@ -58,9 +58,18 @@
>>
>>   #define HVC_CALL_FUNC 3
>>
>> +/*
>> + * HVC_RESET_CPU - Reset cpu in EL2 to initial state.
>> + *
>> + * @x0: entry address in trampoline code in va
>> + * @x1: identical mapping page table in pa
>> + */
>> +
>>   #define BOOT_CPU_MODE_EL1	(0xe11)
>>   #define BOOT_CPU_MODE_EL2	(0xe12)
>>
>> +#define HVC_RESET_CPU 4
>> +
>
> Patch 5 added a fancy new way to call arbitrary functions at el2, why
> not use that? (it would save beating up el1_sync again).

Let me think. I need to detangle some header files.

> I agree the trampoline stuff is complicated - I will try and cook-up a
> version of this patch for hibernate that does this. (... and comment
> what I think is happening above while I'm at it)
>
>
>>   #ifndef __ASSEMBLY__
>>
>>   /*
>> diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
>> index 1d7e502..d909ce2 100644
>> --- a/arch/arm64/kvm/hyp-init.S
>> +++ b/arch/arm64/kvm/hyp-init.S
>> @@ -140,6 +140,39 @@ merged:
>>   	eret
>>   ENDPROC(__kvm_hyp_init)
>>
>> +	/*
>> +	 * x0: HYP boot pgd
>> +	 * x1: HYP phys_idmap_start
>> +	 */
>> +ENTRY(__kvm_hyp_reset)
>> +	/* We're in trampoline code in VA, switch back to boot page tables */
>> +	msr	ttbr0_el2, x0
>> +	isb
>> +
>> +	/* Invalidate the old TLBs */
>> +	tlbi	alle2
>> +	dsb	sy
>> +
>> +	/* Branch into PA space */
>> +	adr	x0, 1f
>> +	bfi	x1, x0, #0, #PAGE_SHIFT
>> +	br	x1
>> +
>> +	/* We're now in idmap, disable MMU */
>> +1:	mrs	x0, sctlr_el2
>> +	ldr	x1, =SCTLR_ELx_FLAGS
>> +	bic	x0, x0, x1		// Clear SCTL_M and etc
>> +	msr	sctlr_el2, x0
>> +	isb
>> +
>> +	/* Install stub vectors */
>> +	adrp	x0, __hyp_stub_vectors
>> +	add	x0, x0, #:lo12:__hyp_stub_vectors
>
> adr_l ?

OK.

Thanks,
-Takahiro AKASHI

>> +	msr	vbar_el2, x0
>> +
>> +	eret
>> +ENDPROC(__kvm_hyp_reset)
>> +
>>   	.ltorg
>>
>>   	.popsection
>> diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
>> index 15b1ef9..ed82dc2 100644
>> --- a/arch/arm64/kvm/hyp.S
>> +++ b/arch/arm64/kvm/hyp.S
>> @@ -986,10 +991,27 @@ el1_sync:					// Guest trapped into EL2
>>   	cmp	x18, #HVC_GET_VECTORS
>>   	b.ne	1f
>>   	mrs	x0, vbar_el2
>> -	b	2f
>> -
>> -1:	/* Default to HVC_CALL_HYP. */
>> +	b	do_eret
>>
>> +	/* jump into trampoline code */
>> +1:	cmp	x18, #HVC_RESET_CPU
>> +	b.ne	2f
>> +	/*
>> +	 * Entry point is:
>> +	 *	TRAMPOLINE_VA
>> +	 *	+ (__kvm_hyp_reset - (__hyp_idmap_text_start & PAGE_MASK))
>> +	 */
>> +	adrp	x2, __kvm_hyp_reset
>> +	add	x2, x2, #:lo12:__kvm_hyp_reset
>> +	adrp	x3, __hyp_idmap_text_start
>> +	add	x3, x3, #:lo12:__hyp_idmap_text_start
>
> adr_l ?
>
>> +	and	x3, x3, PAGE_MASK
>> +	sub	x2, x2, x3
>> +	ldr	x3, =TRAMPOLINE_VA
>> +	add	x2, x2, x3
>> +	br	x2				// no return
>> +
>> +2:	/* Default to HVC_CALL_HYP. */
>>   	push	lr, xzr
>>
>>   	/*
>
>
> Thanks,
>
> James
>

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 18/19] arm64: kdump: update a kernel doc
  2016-01-22 11:13                                 ` Mark Rutland
@ 2016-02-02  5:18                                   ` AKASHI Takahiro
  -1 siblings, 0 replies; 174+ messages in thread
From: AKASHI Takahiro @ 2016-02-02  5:18 UTC (permalink / raw)
  To: linux-arm-kernel

Mark,

On 01/22/2016 08:13 PM, Mark Rutland wrote:
> On Fri, Jan 22, 2016 at 03:23:14PM +0900, AKASHI Takahiro wrote:
>> On 01/21/2016 09:02 PM, Mark Rutland wrote:
>>> On Thu, Jan 21, 2016 at 03:53:42PM +0900, AKASHI Takahiro wrote:
>>>> On 01/20/2016 08:49 PM, Mark Rutland wrote:
>>>>> On Wed, Jan 20, 2016 at 03:07:53PM +0900, AKASHI Takahiro wrote:
>>>>>> On 01/20/2016 11:49 AM, Dave Young wrote:
>>>>>>> Firmware do not know kernel endianniess, kernel should respect firmware
>>>>>>> maps and adapt to it, it sounds like a generic issue not specfic to kexec.
>>>>>>
>>>>>> On arm64, a kernel image header has a bit field to specify the image's endianness.
>>>>>> Anyway, our current implementation replies on a user-supplied dtb to start BE kernel.
>>>>>
>>>>> The firmware should _never_ care about the kernel's endianness. The
>>>>> bootlaoder or first kernel shouldn't care about the next kernel's
>>>>> endianness apart from in exceptional circumstances. The DTB for a LE
>>>>> kernel should look identical to that passed to a BE kernel.
>>>>
>>>> Please note that I didn't say anything different from your last two statements.
>>>> The current arm64 kexec implementation doesn't do anything specific to BE,
>>>> but as far as BE kernel doesn't support UEFI, users are responsible for
>>>> providing a proper dtb.
>>>
>>> I'm just confused as to what you mean by a "proper dtb" in that case.
>>>
>>> If you just mean one with memory nodes hacked in, then that would
>>> currently be a way to make that work, yes.
>>
>> One of useful cases that I have in my mind is kdump.
>> We may want to use a small sub-set of dtb, especially devices, to
>> make the reboot more reliable. Device drivers are likely to be vulnerable
>> at crash.
>
> I don't think that we can reliably have userspace carve out devices from
> the DTB or from ACPI tables in order to achieve that. That's going to
> end up complex and/or incomplete. We also can't do this in the
> kexec_load_file / Secure Boot case.
>
> That's not to say we cannot try, as it's possible when using kexec_load.
> However, it's only going to be possible on a subset of systems, and it
> would probably make sense to reserve this approach to those cases we
> cannot work around by other means (e.g. whitelisting "safe" devices in
> the kdump kernel, forcing explicit resets, etc).
>
>>> It seems like the better option is to fix the BE kernel to support a
>>> UEFI memory map, as that solves other issues.
>>
>> Why did Ard throw away his patch?
>
> In the absence of kexec it wasn't necessary, it only supported a subset
> of the runtime services (and no other features like DMI IIRC), and it
> looked like it would be painful to debug (if something went wrong while
> a CPU was in LE mode, we couldn't even panic()).
>
> Given BE kernels on UEFI were never supported until that point, there
> wasn't a compelling reason to support that case.
>
> Even if we support the UEFI memory map, I don't think it's worth the
> effort to support runtime services, ACPI, and related code that's only
> ever been tested on LE. So realistically this would only work on systems
> using UEFI && DT rather than UEFI && ACPI.
>
>> So, are you now suggesting that we put both "elfcorehdr=" and
>> "usable-memory=" under /chosen in dtb?
>
> Yes.
>
>> That's fair enough.  (as far as nobody cares about incompatibility
>> with other archs.)
>
> Glad to hear! :)

I'm preparing for a new version based on our discussions.
Do you think that UEFI memory map support on BE kernel is a prerequisite
for accepting my kdump?

-Takahiro AKASHI

> Thanks,
> Mark.
>

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 18/19] arm64: kdump: update a kernel doc
@ 2016-02-02  5:18                                   ` AKASHI Takahiro
  0 siblings, 0 replies; 174+ messages in thread
From: AKASHI Takahiro @ 2016-02-02  5:18 UTC (permalink / raw)
  To: Mark Rutland
  Cc: ard.biesheuvel, Geoff Levand, Catalin Marinas, Will Deacon,
	marc.zyngier, James Morse, christoffer.dall, Dave Young, kexec,
	linux-arm-kernel

Mark,

On 01/22/2016 08:13 PM, Mark Rutland wrote:
> On Fri, Jan 22, 2016 at 03:23:14PM +0900, AKASHI Takahiro wrote:
>> On 01/21/2016 09:02 PM, Mark Rutland wrote:
>>> On Thu, Jan 21, 2016 at 03:53:42PM +0900, AKASHI Takahiro wrote:
>>>> On 01/20/2016 08:49 PM, Mark Rutland wrote:
>>>>> On Wed, Jan 20, 2016 at 03:07:53PM +0900, AKASHI Takahiro wrote:
>>>>>> On 01/20/2016 11:49 AM, Dave Young wrote:
>>>>>>> Firmware do not know kernel endianniess, kernel should respect firmware
>>>>>>> maps and adapt to it, it sounds like a generic issue not specfic to kexec.
>>>>>>
>>>>>> On arm64, a kernel image header has a bit field to specify the image's endianness.
>>>>>> Anyway, our current implementation replies on a user-supplied dtb to start BE kernel.
>>>>>
>>>>> The firmware should _never_ care about the kernel's endianness. The
>>>>> bootlaoder or first kernel shouldn't care about the next kernel's
>>>>> endianness apart from in exceptional circumstances. The DTB for a LE
>>>>> kernel should look identical to that passed to a BE kernel.
>>>>
>>>> Please note that I didn't say anything different from your last two statements.
>>>> The current arm64 kexec implementation doesn't do anything specific to BE,
>>>> but as far as BE kernel doesn't support UEFI, users are responsible for
>>>> providing a proper dtb.
>>>
>>> I'm just confused as to what you mean by a "proper dtb" in that case.
>>>
>>> If you just mean one with memory nodes hacked in, then that would
>>> currently be a way to make that work, yes.
>>
>> One of useful cases that I have in my mind is kdump.
>> We may want to use a small sub-set of dtb, especially devices, to
>> make the reboot more reliable. Device drivers are likely to be vulnerable
>> at crash.
>
> I don't think that we can reliably have userspace carve out devices from
> the DTB or from ACPI tables in order to achieve that. That's going to
> end up complex and/or incomplete. We also can't do this in the
> kexec_load_file / Secure Boot case.
>
> That's not to say we cannot try, as it's possible when using kexec_load.
> However, it's only going to be possible on a subset of systems, and it
> would probably make sense to reserve this approach to those cases we
> cannot work around by other means (e.g. whitelisting "safe" devices in
> the kdump kernel, forcing explicit resets, etc).
>
>>> It seems like the better option is to fix the BE kernel to support a
>>> UEFI memory map, as that solves other issues.
>>
>> Why did Ard throw away his patch?
>
> In the absence of kexec it wasn't necessary, it only supported a subset
> of the runtime services (and no other features like DMI IIRC), and it
> looked like it would be painful to debug (if something went wrong while
> a CPU was in LE mode, we couldn't even panic()).
>
> Given BE kernels on UEFI were never supported until that point, there
> wasn't a compelling reason to support that case.
>
> Even if we support the UEFI memory map, I don't think it's worth the
> effort to support runtime services, ACPI, and related code that's only
> ever been tested on LE. So realistically this would only work on systems
> using UEFI && DT rather than UEFI && ACPI.
>
>> So, are you now suggesting that we put both "elfcorehdr=" and
>> "usable-memory=" under /chosen in dtb?
>
> Yes.
>
>> That's fair enough.  (as far as nobody cares about incompatibility
>> with other archs.)
>
> Glad to hear! :)

I'm preparing for a new version based on our discussions.
Do you think that UEFI memory map support on BE kernel is a prerequisite
for accepting my kdump?

-Takahiro AKASHI

> Thanks,
> Mark.
>

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 00/19] arm64 kexec kernel patches v13
       [not found] ` <c7575f853ccc491bb0212e025aab1cc9@NASANEXM01H.na.qualcomm.com>
@ 2016-03-01 17:54     ` Azriel Samson
  0 siblings, 0 replies; 174+ messages in thread
From: Azriel Samson @ 2016-03-01 17:54 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Geoff,

On 01/15/16 at 07:18pm, Geoff Levand wrote:
> This series adds the core support for kexec re-boot and kdump on
> ARM64.This version of the series combines Takahiro's kdump patches
> with my kexec patches. Please consider all patches for inclusion.
>
> I just tested all the endian combinations of kexec LE->LE, LE->BE,
> BE->BE, BE->LE, and both LE an BE kdump, and all work as expected.
>
> To load a second stage kernel and execute a kexec re-boot or to work
> with kdump on ARM64 systems a series of patches to kexec-tools [2],
> which have not yet been merged upstream, are needed.
>
> To examine vmcore (/proc/vmcore), you should use - gdb v7.7 or later
> - crash v7.1.1 or later
>
> [1]
> https://git.kernel.org/cgit/linux/kernel/git/geoff/linux-kexec.git
> [2]
> https://git.kernel.org/cgit/linux/kernel/git/geoff/kexec-tools.git
>

We have been using the kexec/kdump patches on ARM64 and would like to 
use them from upstream.

Assuming these are not going to be in Linux-4.5, a rebased set on 
Linux-4.5 would be very useful.

Thank you for your work.
-- 
Thanks,
Azriel Samson
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 00/19] arm64 kexec kernel patches v13
@ 2016-03-01 17:54     ` Azriel Samson
  0 siblings, 0 replies; 174+ messages in thread
From: Azriel Samson @ 2016-03-01 17:54 UTC (permalink / raw)
  To: Geoff Levand
  Cc: 'Mark Rutland',
	marc.zyngier, catalin.marinas, 'Will Deacon',
	'AKASHI Takahiro',
	james.morse, christoffer.dall, virajm, kexec, linux-arm-kernel

Hi Geoff,

On 01/15/16 at 07:18pm, Geoff Levand wrote:
> This series adds the core support for kexec re-boot and kdump on
> ARM64.This version of the series combines Takahiro's kdump patches
> with my kexec patches. Please consider all patches for inclusion.
>
> I just tested all the endian combinations of kexec LE->LE, LE->BE,
> BE->BE, BE->LE, and both LE an BE kdump, and all work as expected.
>
> To load a second stage kernel and execute a kexec re-boot or to work
> with kdump on ARM64 systems a series of patches to kexec-tools [2],
> which have not yet been merged upstream, are needed.
>
> To examine vmcore (/proc/vmcore), you should use - gdb v7.7 or later
> - crash v7.1.1 or later
>
> [1]
> https://git.kernel.org/cgit/linux/kernel/git/geoff/linux-kexec.git
> [2]
> https://git.kernel.org/cgit/linux/kernel/git/geoff/kexec-tools.git
>

We have been using the kexec/kdump patches on ARM64 and would like to 
use them from upstream.

Assuming these are not going to be in Linux-4.5, a rebased set on 
Linux-4.5 would be very useful.

Thank you for your work.
-- 
Thanks,
Azriel Samson
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 00/19] arm64 kexec kernel patches v13
  2016-03-01 17:54     ` Azriel Samson
@ 2016-03-02  1:17       ` Geoff Levand
  -1 siblings, 0 replies; 174+ messages in thread
From: Geoff Levand @ 2016-03-02  1:17 UTC (permalink / raw)
  To: linux-arm-kernel

Hi,

On Tue, 2016-03-01 at 10:54 -0700, Azriel Samson wrote:

> We have been using the kexec/kdump patches on ARM64 and would like to
> use them from upstream.
> 
> Assuming these are not going to be in Linux-4.5, a rebased set on 
> Linux-4.5 would be very useful.

I'm waiting for one of two things to happen, either Marc Zyngier gives
an ack for the patch 'arm64: kvm: allows kvm cpu hotplug', or the arm64
maintainers decide they will accept my proposal to merge kexec with a
conditional in the Kconfig that only allows one of KVM or kexec to be
selected.

The kexec patches have been in review for 20 months now, with only
minor changes in the past 15.  I don't expect much would be gained in
posting the kexec series again unless there is a chance they would be
merged.

-Geoff

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 00/19] arm64 kexec kernel patches v13
@ 2016-03-02  1:17       ` Geoff Levand
  0 siblings, 0 replies; 174+ messages in thread
From: Geoff Levand @ 2016-03-02  1:17 UTC (permalink / raw)
  To: Azriel Samson, catalin.marinas, 'Will Deacon', marc.zyngier
  Cc: 'Mark Rutland', kexec, 'AKASHI Takahiro',
	james.morse, linux-arm-kernel, virajm, christoffer.dall

Hi,

On Tue, 2016-03-01 at 10:54 -0700, Azriel Samson wrote:

> We have been using the kexec/kdump patches on ARM64 and would like to
> use them from upstream.
> 
> Assuming these are not going to be in Linux-4.5, a rebased set on 
> Linux-4.5 would be very useful.

I'm waiting for one of two things to happen, either Marc Zyngier gives
an ack for the patch 'arm64: kvm: allows kvm cpu hotplug', or the arm64
maintainers decide they will accept my proposal to merge kexec with a
conditional in the Kconfig that only allows one of KVM or kexec to be
selected.

The kexec patches have been in review for 20 months now, with only
minor changes in the past 15.  I don't expect much would be gained in
posting the kexec series again unless there is a chance they would be
merged.

-Geoff


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 00/19] arm64 kexec kernel patches v13
  2016-03-02  1:17       ` Geoff Levand
@ 2016-03-02  1:38         ` Will Deacon
  -1 siblings, 0 replies; 174+ messages in thread
From: Will Deacon @ 2016-03-02  1:38 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Mar 01, 2016 at 05:17:55PM -0800, Geoff Levand wrote:
> On Tue, 2016-03-01 at 10:54 -0700, Azriel Samson wrote:
> 
> > We have been using the kexec/kdump patches on ARM64 and would like to
> > use them from upstream.
> > 
> > Assuming these are not going to be in Linux-4.5, a rebased set on 
> > Linux-4.5 would be very useful.
> 
> I'm waiting for one of two things to happen, either Marc Zyngier gives
> an ack for the patch 'arm64: kvm: allows kvm cpu hotplug', or the arm64
> maintainers decide they will accept my proposal to merge kexec with a
> conditional in the Kconfig that only allows one of KVM or kexec to be
> selected.

Depending on !KVM isn't even an option as far as I'm concerned. It basically
makes this all dead code.

As for awaiting an ack on the kvm hotplug patch, afaict, Akashi was going
to address James's comments on the last posting [1].

> The kexec patches have been in review for 20 months now, with only
> minor changes in the past 15.  I don't expect much would be gained in
> posting the kexec series again unless there is a chance they would be
> merged.

Why do you say that? v13 attracted lots of comments from a variety of
reviewers, so the onus is on you to address that feedback and post a
new version. Alternatively, declare that you no longer to intend to work
on the series, and perhaps then somebody else can pick it up and finish
the job.

Will

[1] http://lkml.kernel.org/r/56A873C0.6090406 at linaro.org

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 00/19] arm64 kexec kernel patches v13
@ 2016-03-02  1:38         ` Will Deacon
  0 siblings, 0 replies; 174+ messages in thread
From: Will Deacon @ 2016-03-02  1:38 UTC (permalink / raw)
  To: Geoff Levand
  Cc: 'Mark Rutland',
	marc.zyngier, catalin.marinas, Azriel Samson, kexec,
	'AKASHI Takahiro',
	james.morse, christoffer.dall, virajm, linux-arm-kernel

On Tue, Mar 01, 2016 at 05:17:55PM -0800, Geoff Levand wrote:
> On Tue, 2016-03-01 at 10:54 -0700, Azriel Samson wrote:
> 
> > We have been using the kexec/kdump patches on ARM64 and would like to
> > use them from upstream.
> > 
> > Assuming these are not going to be in Linux-4.5, a rebased set on 
> > Linux-4.5 would be very useful.
> 
> I'm waiting for one of two things to happen, either Marc Zyngier gives
> an ack for the patch 'arm64: kvm: allows kvm cpu hotplug', or the arm64
> maintainers decide they will accept my proposal to merge kexec with a
> conditional in the Kconfig that only allows one of KVM or kexec to be
> selected.

Depending on !KVM isn't even an option as far as I'm concerned. It basically
makes this all dead code.

As for awaiting an ack on the kvm hotplug patch, afaict, Akashi was going
to address James's comments on the last posting [1].

> The kexec patches have been in review for 20 months now, with only
> minor changes in the past 15.  I don't expect much would be gained in
> posting the kexec series again unless there is a chance they would be
> merged.

Why do you say that? v13 attracted lots of comments from a variety of
reviewers, so the onus is on you to address that feedback and post a
new version. Alternatively, declare that you no longer to intend to work
on the series, and perhaps then somebody else can pick it up and finish
the job.

Will

[1] http://lkml.kernel.org/r/56A873C0.6090406@linaro.org

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 00/19] arm64 kexec kernel patches v13
  2016-03-02  1:38         ` Will Deacon
@ 2016-03-02  2:28           ` AKASHI Takahiro
  -1 siblings, 0 replies; 174+ messages in thread
From: AKASHI Takahiro @ 2016-03-02  2:28 UTC (permalink / raw)
  To: linux-arm-kernel

On 03/02/2016 10:38 AM, Will Deacon wrote:
> On Tue, Mar 01, 2016 at 05:17:55PM -0800, Geoff Levand wrote:
>> On Tue, 2016-03-01 at 10:54 -0700, Azriel Samson wrote:
>>
>>> We have been using the kexec/kdump patches on ARM64 and would like to
>>> use them from upstream.
>>>
>>> Assuming these are not going to be in Linux-4.5, a rebased set on
>>> Linux-4.5 would be very useful.
>>
>> I'm waiting for one of two things to happen, either Marc Zyngier gives
>> an ack for the patch 'arm64: kvm: allows kvm cpu hotplug', or the arm64
>> maintainers decide they will accept my proposal to merge kexec with a
>> conditional in the Kconfig that only allows one of KVM or kexec to be
>> selected.
>
> Depending on !KVM isn't even an option as far as I'm concerned. It basically
> makes this all dead code.
>
> As for awaiting an ack on the kvm hotplug patch, afaict, Akashi was going
> to address James's comments on the last posting [1].

Come on!
I have already fixed this issue, but still waiting for Geoff rebasing his patch
as he is now a submitter of the whole patch series including kexec/kdump.
(Please note, as James suggested, some (minor) modification need be made for
rebasing onto v4.5)

>> The kexec patches have been in review for 20 months now, with only
>> minor changes in the past 15.  I don't expect much would be gained in
>> posting the kexec series again unless there is a chance they would be
>> merged.
>
> Why do you say that? v13 attracted lots of comments from a variety of
> reviewers, so the onus is on you to address that feedback and post a
> new version. Alternatively, declare that you no longer to intend to work
> on the series, and perhaps then somebody else can pick it up and finish
> the job.

*If* Geoff is no longer willing to work on upstreaming kexec stuff,
I will probably take over his task.

-Takahiro AKASHI

> Will
>
> [1] http://lkml.kernel.org/r/56A873C0.6090406 at linaro.org
>

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 00/19] arm64 kexec kernel patches v13
@ 2016-03-02  2:28           ` AKASHI Takahiro
  0 siblings, 0 replies; 174+ messages in thread
From: AKASHI Takahiro @ 2016-03-02  2:28 UTC (permalink / raw)
  To: Will Deacon, Geoff Levand
  Cc: 'Mark Rutland',
	marc.zyngier, catalin.marinas, Azriel Samson, kexec, james.morse,
	christoffer.dall, virajm, linux-arm-kernel

On 03/02/2016 10:38 AM, Will Deacon wrote:
> On Tue, Mar 01, 2016 at 05:17:55PM -0800, Geoff Levand wrote:
>> On Tue, 2016-03-01 at 10:54 -0700, Azriel Samson wrote:
>>
>>> We have been using the kexec/kdump patches on ARM64 and would like to
>>> use them from upstream.
>>>
>>> Assuming these are not going to be in Linux-4.5, a rebased set on
>>> Linux-4.5 would be very useful.
>>
>> I'm waiting for one of two things to happen, either Marc Zyngier gives
>> an ack for the patch 'arm64: kvm: allows kvm cpu hotplug', or the arm64
>> maintainers decide they will accept my proposal to merge kexec with a
>> conditional in the Kconfig that only allows one of KVM or kexec to be
>> selected.
>
> Depending on !KVM isn't even an option as far as I'm concerned. It basically
> makes this all dead code.
>
> As for awaiting an ack on the kvm hotplug patch, afaict, Akashi was going
> to address James's comments on the last posting [1].

Come on!
I have already fixed this issue, but still waiting for Geoff rebasing his patch
as he is now a submitter of the whole patch series including kexec/kdump.
(Please note, as James suggested, some (minor) modification need be made for
rebasing onto v4.5)

>> The kexec patches have been in review for 20 months now, with only
>> minor changes in the past 15.  I don't expect much would be gained in
>> posting the kexec series again unless there is a chance they would be
>> merged.
>
> Why do you say that? v13 attracted lots of comments from a variety of
> reviewers, so the onus is on you to address that feedback and post a
> new version. Alternatively, declare that you no longer to intend to work
> on the series, and perhaps then somebody else can pick it up and finish
> the job.

*If* Geoff is no longer willing to work on upstreaming kexec stuff,
I will probably take over his task.

-Takahiro AKASHI

> Will
>
> [1] http://lkml.kernel.org/r/56A873C0.6090406@linaro.org
>

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 00/19] arm64 kexec kernel patches v13
  2016-03-02  1:17       ` Geoff Levand
@ 2016-03-02  8:07         ` Marc Zyngier
  -1 siblings, 0 replies; 174+ messages in thread
From: Marc Zyngier @ 2016-03-02  8:07 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, 1 Mar 2016 17:17:55 -0800
Geoff Levand <geoff@infradead.org> wrote:

Geoff,

> Hi,
> 
> On Tue, 2016-03-01 at 10:54 -0700, Azriel Samson wrote:
> 
> > We have been using the kexec/kdump patches on ARM64 and would like to
> > use them from upstream.
> > 
> > Assuming these are not going to be in Linux-4.5, a rebased set on 
> > Linux-4.5 would be very useful.
> 
> I'm waiting for one of two things to happen, either Marc Zyngier gives
> an ack for the patch 'arm64: kvm: allows kvm cpu hotplug', or the arm64
> maintainers decide they will accept my proposal to merge kexec with a
> conditional in the Kconfig that only allows one of KVM or kexec to be
> selected.

Do you expect me to give my Ack on a series that I cannot even apply
(let alone test) with mainline?

> The kexec patches have been in review for 20 months now, with only
> minor changes in the past 15.

Given the amount of comment each posting of this series has attracted, I
think we have a different appreciation of what a minor change is.

> I don't expect much would be gained in posting the kexec series again
> unless there is a chance they would be merged.

I think that's perfectly fine. Akashi Takahiro and James Morse have
been working together for a while now, reusing part of the kexec series
for the hibernate support (and not breaking KVM, oddly enough). My vote
is that they both take over the maintenance of this series on top the
the hibernate support, and finish the job.

Thanks,

	M.
-- 
Jazz is not dead. It just smells funny.

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 00/19] arm64 kexec kernel patches v13
@ 2016-03-02  8:07         ` Marc Zyngier
  0 siblings, 0 replies; 174+ messages in thread
From: Marc Zyngier @ 2016-03-02  8:07 UTC (permalink / raw)
  To: Geoff Levand
  Cc: 'Mark Rutland',
	catalin.marinas, Azriel Samson, 'Will Deacon',
	'AKASHI Takahiro',
	james.morse, christoffer.dall, virajm, kexec, linux-arm-kernel

On Tue, 1 Mar 2016 17:17:55 -0800
Geoff Levand <geoff@infradead.org> wrote:

Geoff,

> Hi,
> 
> On Tue, 2016-03-01 at 10:54 -0700, Azriel Samson wrote:
> 
> > We have been using the kexec/kdump patches on ARM64 and would like to
> > use them from upstream.
> > 
> > Assuming these are not going to be in Linux-4.5, a rebased set on 
> > Linux-4.5 would be very useful.
> 
> I'm waiting for one of two things to happen, either Marc Zyngier gives
> an ack for the patch 'arm64: kvm: allows kvm cpu hotplug', or the arm64
> maintainers decide they will accept my proposal to merge kexec with a
> conditional in the Kconfig that only allows one of KVM or kexec to be
> selected.

Do you expect me to give my Ack on a series that I cannot even apply
(let alone test) with mainline?

> The kexec patches have been in review for 20 months now, with only
> minor changes in the past 15.

Given the amount of comment each posting of this series has attracted, I
think we have a different appreciation of what a minor change is.

> I don't expect much would be gained in posting the kexec series again
> unless there is a chance they would be merged.

I think that's perfectly fine. Akashi Takahiro and James Morse have
been working together for a while now, reusing part of the kexec series
for the hibernate support (and not breaking KVM, oddly enough). My vote
is that they both take over the maintenance of this series on top the
the hibernate support, and finish the job.

Thanks,

	M.
-- 
Jazz is not dead. It just smells funny.

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 00/19] arm64 kexec kernel patches v13
  2016-03-01 17:54     ` Azriel Samson
@ 2016-03-02 12:33       ` Pratyush Anand
  -1 siblings, 0 replies; 174+ messages in thread
From: Pratyush Anand @ 2016-03-02 12:33 UTC (permalink / raw)
  To: linux-arm-kernel

On 01/03/2016:10:54:43 AM, Azriel Samson wrote:
> We have been using the kexec/kdump patches on ARM64 and would like to use
> them from upstream.
> 
> Assuming these are not going to be in Linux-4.5, a rebased set on Linux-4.5
> would be very useful.

If you just want rebased set then you may take it from my branch, until next
version is released.

https://github.com/pratyushanand/linux.git : kexec/upstream_arm64_devel

~Pratyush

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 00/19] arm64 kexec kernel patches v13
@ 2016-03-02 12:33       ` Pratyush Anand
  0 siblings, 0 replies; 174+ messages in thread
From: Pratyush Anand @ 2016-03-02 12:33 UTC (permalink / raw)
  To: Azriel Samson
  Cc: 'Mark Rutland',
	Geoff Levand, catalin.marinas, 'Will Deacon',
	'AKASHI Takahiro',
	james.morse, linux-arm-kernel, marc.zyngier, virajm, kexec,
	christoffer.dall

On 01/03/2016:10:54:43 AM, Azriel Samson wrote:
> We have been using the kexec/kdump patches on ARM64 and would like to use
> them from upstream.
> 
> Assuming these are not going to be in Linux-4.5, a rebased set on Linux-4.5
> would be very useful.

If you just want rebased set then you may take it from my branch, until next
version is released.

https://github.com/pratyushanand/linux.git : kexec/upstream_arm64_devel

~Pratyush

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

* [PATCH 00/19] arm64 kexec kernel patches v13
  2016-03-02 12:33       ` Pratyush Anand
@ 2016-03-02 16:51         ` Azriel Samson
  -1 siblings, 0 replies; 174+ messages in thread
From: Azriel Samson @ 2016-03-02 16:51 UTC (permalink / raw)
  To: linux-arm-kernel



On 3/2/2016 5:33 AM, Pratyush Anand wrote:

> If you just want rebased set then you may take it from my branch, until next
> version is released.
>
> https://github.com/pratyushanand/linux.git : kexec/upstream_arm64_devel
>

Thanks Pratyush,
This should help for now.
-- 
Thanks,
Azriel Samson
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

^ permalink raw reply	[flat|nested] 174+ messages in thread

* Re: [PATCH 00/19] arm64 kexec kernel patches v13
@ 2016-03-02 16:51         ` Azriel Samson
  0 siblings, 0 replies; 174+ messages in thread
From: Azriel Samson @ 2016-03-02 16:51 UTC (permalink / raw)
  To: Pratyush Anand
  Cc: 'Mark Rutland',
	Geoff Levand, catalin.marinas, 'Will Deacon',
	'AKASHI Takahiro',
	james.morse, christoffer.dall, marc.zyngier, virajm, kexec,
	linux-arm-kernel



On 3/2/2016 5:33 AM, Pratyush Anand wrote:

> If you just want rebased set then you may take it from my branch, until next
> version is released.
>
> https://github.com/pratyushanand/linux.git : kexec/upstream_arm64_devel
>

Thanks Pratyush,
This should help for now.
-- 
Thanks,
Azriel Samson
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 174+ messages in thread

end of thread, other threads:[~2016-03-02 16:51 UTC | newest]

Thread overview: 174+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-01-15 19:18 [PATCH 00/19] arm64 kexec kernel patches v13 Geoff Levand
2016-01-15 19:18 ` Geoff Levand
2016-01-15 19:18 ` [PATCH 07/19] arm64: Add back cpu_reset routines Geoff Levand
2016-01-15 19:18   ` Geoff Levand
2016-01-15 19:18 ` [PATCH 03/19] arm64: Add new asm macro copy_page Geoff Levand
2016-01-15 19:18   ` Geoff Levand
2016-01-20 14:01   ` James Morse
2016-01-20 14:01     ` James Morse
2016-01-15 19:18 ` [PATCH 05/19] arm64: Convert hcalls to use HVC immediate value Geoff Levand
2016-01-15 19:18   ` Geoff Levand
2016-01-15 19:18 ` [PATCH 02/19] arm64: kernel: Include _AC definition in page.h Geoff Levand
2016-01-15 19:18   ` Geoff Levand
2016-01-18 10:05   ` Mark Rutland
2016-01-18 10:05     ` Mark Rutland
2016-01-15 19:18 ` [PATCH 09/19] Revert "arm64: remove dead code" Geoff Levand
2016-01-15 19:18   ` Geoff Levand
2016-01-15 19:55   ` Mark Rutland
2016-01-15 19:55     ` Mark Rutland
2016-01-20 21:18     ` Geoff Levand
2016-01-20 21:18       ` Geoff Levand
2016-01-15 19:18 ` [PATCH 04/19] arm64: Cleanup SCTLR flags Geoff Levand
2016-01-15 19:18   ` Geoff Levand
2016-01-15 20:07   ` Mark Rutland
2016-01-15 20:07     ` Mark Rutland
2016-01-18 10:12     ` Marc Zyngier
2016-01-18 10:12       ` Marc Zyngier
2016-01-19 11:59       ` Dave Martin
2016-01-19 11:59         ` Dave Martin
2016-01-25 15:09   ` James Morse
2016-01-25 15:09     ` James Morse
2016-01-15 19:18 ` [PATCH 08/19] Revert "arm64: mm: remove unused cpu_set_idmap_tcr_t0sz function" Geoff Levand
2016-01-15 19:18   ` Geoff Levand
2016-01-15 19:18 ` [PATCH 01/19] arm64: Fold proc-macros.S into assembler.h Geoff Levand
2016-01-15 19:18   ` Geoff Levand
2016-01-15 19:18 ` [PATCH 06/19] arm64: Add new hcall HVC_CALL_FUNC Geoff Levand
2016-01-15 19:18   ` Geoff Levand
2016-01-15 19:18 ` [PATCH 17/19] arm64: kdump: enable kdump in the arm64 defconfig Geoff Levand
2016-01-15 19:18   ` Geoff Levand
2016-01-15 19:18 ` [PATCH 15/19] arm64: kdump: implement machine_crash_shutdown() Geoff Levand
2016-01-15 19:18   ` Geoff Levand
2016-01-15 19:18 ` [PATCH 19/19] arm64: kdump: relax BUG_ON() if more than one cpus are still active Geoff Levand
2016-01-15 19:18   ` Geoff Levand
2016-01-15 19:18 ` [PATCH 12/19] arm64/kexec: Enable kexec in the arm64 defconfig Geoff Levand
2016-01-15 19:18   ` Geoff Levand
2016-01-15 19:18 ` [PATCH 18/19] arm64: kdump: update a kernel doc Geoff Levand
2016-01-15 19:18   ` Geoff Levand
2016-01-15 20:16   ` Mark Rutland
2016-01-15 20:16     ` Mark Rutland
2016-01-18 10:26     ` AKASHI Takahiro
2016-01-18 10:26       ` AKASHI Takahiro
2016-01-18 11:29       ` Mark Rutland
2016-01-18 11:29         ` Mark Rutland
2016-01-19  5:31         ` AKASHI Takahiro
2016-01-19  5:31           ` AKASHI Takahiro
2016-01-19 12:10           ` Mark Rutland
2016-01-19 12:10             ` Mark Rutland
2016-01-20  4:34             ` AKASHI Takahiro
2016-01-20  4:34               ` AKASHI Takahiro
2016-01-19  1:43       ` Dave Young
2016-01-19  1:43         ` Dave Young
2016-01-19  1:50         ` Dave Young
2016-01-19  1:50           ` Dave Young
2016-01-19  5:35         ` AKASHI Takahiro
2016-01-19  5:35           ` AKASHI Takahiro
2016-01-19 12:28           ` Dave Young
2016-01-19 12:28             ` Dave Young
2016-01-19 12:51             ` Mark Rutland
2016-01-19 12:51               ` Mark Rutland
2016-01-19 13:45               ` Dave Young
2016-01-19 13:45                 ` Dave Young
2016-01-19 14:01                 ` Mark Rutland
2016-01-19 14:01                   ` Mark Rutland
2016-01-20  2:49                   ` Dave Young
2016-01-20  2:49                     ` Dave Young
2016-01-20  6:07                     ` AKASHI Takahiro
2016-01-20  6:07                       ` AKASHI Takahiro
2016-01-20  6:38                       ` Dave Young
2016-01-20  6:38                         ` Dave Young
2016-01-20  7:00                         ` Dave Young
2016-01-20  7:00                           ` Dave Young
2016-01-20  8:01                           ` AKASHI Takahiro
2016-01-20  8:01                             ` AKASHI Takahiro
2016-01-20  8:26                             ` Dave Young
2016-01-20  8:26                               ` Dave Young
2016-01-20 11:54                         ` Mark Rutland
2016-01-20 11:54                           ` Mark Rutland
2016-01-21  2:57                           ` Dave Young
2016-01-21  2:57                             ` Dave Young
2016-01-21  3:03                           ` Dave Young
2016-01-21  3:03                             ` Dave Young
2016-01-20 11:49                       ` Mark Rutland
2016-01-20 11:49                         ` Mark Rutland
2016-01-21  6:53                         ` AKASHI Takahiro
2016-01-21  6:53                           ` AKASHI Takahiro
2016-01-21 12:02                           ` Mark Rutland
2016-01-21 12:02                             ` Mark Rutland
2016-01-22  6:23                             ` AKASHI Takahiro
2016-01-22  6:23                               ` AKASHI Takahiro
2016-01-22 11:13                               ` Mark Rutland
2016-01-22 11:13                                 ` Mark Rutland
2016-02-02  5:18                                 ` AKASHI Takahiro
2016-02-02  5:18                                   ` AKASHI Takahiro
2016-01-25  3:19                               ` Dave Young
2016-01-25  3:19                                 ` Dave Young
2016-01-25  4:23                                 ` Dave Young
2016-01-25  4:23                                   ` Dave Young
2016-01-20 11:28                     ` Mark Rutland
2016-01-20 11:28                       ` Mark Rutland
2016-01-21  2:54                       ` Dave Young
2016-01-21  2:54                         ` Dave Young
2016-01-20  5:25                   ` AKASHI Takahiro
2016-01-20  5:25                     ` AKASHI Takahiro
2016-01-20 12:02                     ` Mark Rutland
2016-01-20 12:02                       ` Mark Rutland
2016-01-20 12:36                       ` Mark Rutland
2016-01-20 12:36                         ` Mark Rutland
2016-01-20 14:59                         ` Ard Biesheuvel
2016-01-20 14:59                           ` Ard Biesheuvel
2016-01-20 15:04                           ` Mark Rutland
2016-01-20 15:04                             ` Mark Rutland
2016-01-21  5:43                           ` AKASHI Takahiro
2016-01-21  5:43                             ` AKASHI Takahiro
2016-01-21 13:02                             ` Mark Rutland
2016-01-21 13:02                               ` Mark Rutland
2016-01-19 12:17         ` Mark Rutland
2016-01-19 12:17           ` Mark Rutland
2016-01-19 13:52           ` Dave Young
2016-01-19 13:52             ` Dave Young
2016-01-19 14:05             ` Mark Rutland
2016-01-19 14:05               ` Mark Rutland
2016-01-20  2:54               ` Dave Young
2016-01-20  2:54                 ` Dave Young
2016-01-15 19:18 ` [PATCH 10/19] arm64: kvm: allows kvm cpu hotplug Geoff Levand
2016-01-15 19:18   ` Geoff Levand
2016-01-26 17:42   ` James Morse
2016-01-26 17:42     ` James Morse
2016-01-27  7:37     ` AKASHI Takahiro
2016-01-27  7:37       ` AKASHI Takahiro
2016-01-15 19:18 ` [PATCH 14/19] arm64: kdump: reserve memory for crash dump kernel Geoff Levand
2016-01-15 19:18   ` Geoff Levand
2016-01-15 19:18 ` [PATCH 11/19] arm64/kexec: Add core kexec support Geoff Levand
2016-01-15 19:18   ` Geoff Levand
2016-01-15 19:18 ` [PATCH 13/19] arm64/kexec: Add pr_debug output Geoff Levand
2016-01-15 19:18   ` Geoff Levand
2016-01-15 19:18 ` [PATCH 16/19] arm64: kdump: add kdump support Geoff Levand
2016-01-15 19:18   ` Geoff Levand
2016-01-21 14:17   ` James Morse
2016-01-21 14:17     ` James Morse
2016-01-22  4:50     ` AKASHI Takahiro
2016-01-22  4:50       ` AKASHI Takahiro
2016-01-19 12:32 ` [PATCH 00/19] arm64 kexec kernel patches v13 Dave Young
2016-01-19 12:32   ` Dave Young
2016-01-20  0:15   ` Geoff Levand
2016-01-20  0:15     ` Geoff Levand
2016-01-20  2:56     ` Dave Young
2016-01-20  2:56       ` Dave Young
2016-01-20 21:15       ` Geoff Levand
2016-01-20 21:15         ` Geoff Levand
2016-01-21 12:11       ` Mark Rutland
2016-01-21 12:11         ` Mark Rutland
     [not found] ` <c7575f853ccc491bb0212e025aab1cc9@NASANEXM01H.na.qualcomm.com>
2016-03-01 17:54   ` Azriel Samson
2016-03-01 17:54     ` Azriel Samson
2016-03-02  1:17     ` Geoff Levand
2016-03-02  1:17       ` Geoff Levand
2016-03-02  1:38       ` Will Deacon
2016-03-02  1:38         ` Will Deacon
2016-03-02  2:28         ` AKASHI Takahiro
2016-03-02  2:28           ` AKASHI Takahiro
2016-03-02  8:07       ` Marc Zyngier
2016-03-02  8:07         ` Marc Zyngier
2016-03-02 12:33     ` Pratyush Anand
2016-03-02 12:33       ` Pratyush Anand
2016-03-02 16:51       ` Azriel Samson
2016-03-02 16:51         ` Azriel Samson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.