loongarch.lists.linux.dev archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/3] LoongArch: Add kexec/kdump support
@ 2022-08-29  4:37 Youling Tang
  2022-08-29  4:37 ` [PATCH 1/3] LoongArch: Add kexec support Youling Tang
                   ` (2 more replies)
  0 siblings, 3 replies; 20+ messages in thread
From: Youling Tang @ 2022-08-29  4:37 UTC (permalink / raw)
  To: Huacai Chen, Baoquan He, Eric Biederman
  Cc: WANG Xuerui, Vivek Goyal, Dave Young, Guo Ren, Jiaxun Yang,
	kexec, loongarch, linux-kernel

This patch series to support kexec/kdump (only 64bit).

Kexec is a system call that enables you to load and boot into another kernel
from the currently running kernel. This is useful for kernel developers or
other people who need to reboot very quickly without waiting for the whole
BIOS boot process to finish. 

Kdump uses kexec to quickly boot to a dump-capture kernel whenever a
dump of the system kernel's memory needs to be taken (for example, when
the system panics). The system kernel's memory image is preserved across
the reboot and is accessible to the dump-capture kernel.

For details, see Documentation/admin-guide/kdump/kdump.rst.

User tools kexec-tools see link [1].

TODO:
Currently kdump does not support the same binary image, the production kernel
and the capture kernel will be generated with different configurations. I will
support kernel relocation support in the near future. Then will go to implement
the same binary support based on kernel relocation support.

[1] Link: https://github.com/tangyouling/kexec-tools


Youling Tang (3):
  LoongArch: Add kexec support
  LoongArch: Add kdump support
  LoongArch: Enable CONFIG_KEXEC

 arch/loongarch/Kconfig                     |  33 ++++
 arch/loongarch/Makefile                    |   4 +
 arch/loongarch/configs/loongson3_defconfig |   1 +
 arch/loongarch/include/asm/kexec.h         |  58 +++++++
 arch/loongarch/kernel/Makefile             |   3 +
 arch/loongarch/kernel/crash.c              | 100 ++++++++++++
 arch/loongarch/kernel/crash_dump.c         |  19 +++
 arch/loongarch/kernel/head.S               |   7 +-
 arch/loongarch/kernel/machine_kexec.c      | 176 +++++++++++++++++++++
 arch/loongarch/kernel/mem.c                |   6 +
 arch/loongarch/kernel/relocate_kernel.S    | 131 +++++++++++++++
 arch/loongarch/kernel/setup.c              |  49 ++++++
 arch/loongarch/kernel/traps.c              |   4 +
 13 files changed, 590 insertions(+), 1 deletion(-)
 create mode 100644 arch/loongarch/include/asm/kexec.h
 create mode 100644 arch/loongarch/kernel/crash.c
 create mode 100644 arch/loongarch/kernel/crash_dump.c
 create mode 100644 arch/loongarch/kernel/machine_kexec.c
 create mode 100644 arch/loongarch/kernel/relocate_kernel.S

-- 
2.36.0


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 1/3] LoongArch: Add kexec support
  2022-08-29  4:37 [PATCH 0/3] LoongArch: Add kexec/kdump support Youling Tang
@ 2022-08-29  4:37 ` Youling Tang
  2022-08-30  1:53   ` Jinyang He
  2022-09-05  1:01   ` Youling Tang
  2022-08-29  4:37 ` [PATCH 2/3] LoongArch: Add kdump support Youling Tang
  2022-08-29  4:37 ` [PATCH 3/3] LoongArch: Enable CONFIG_KEXEC Youling Tang
  2 siblings, 2 replies; 20+ messages in thread
From: Youling Tang @ 2022-08-29  4:37 UTC (permalink / raw)
  To: Huacai Chen, Baoquan He, Eric Biederman
  Cc: WANG Xuerui, Vivek Goyal, Dave Young, Guo Ren, Jiaxun Yang,
	kexec, loongarch, linux-kernel

Add three new files, kexec.h, machine_kexec.c and relocate_kernel.S to the
LoongArch architecture that add support for the kexec re-boot mechanis
(CONFIG_KEXEC) on LoongArch platforms.

Supports loading vmlinux (vmlinux.elf) in ELF format and vmlinux.efi in
PE format.

I tested this on  LoongArch 3A5000 machine and works as expected,

 $ sudo kexec -l /boot/vmlinux.efi --reuse-cmdline
 $ sudo kexec -e

Signed-off-by: Youling Tang <tangyouling@loongson.cn>
---
 arch/loongarch/Kconfig                  |  11 ++
 arch/loongarch/include/asm/kexec.h      |  58 ++++++++
 arch/loongarch/kernel/Makefile          |   2 +
 arch/loongarch/kernel/head.S            |   7 +-
 arch/loongarch/kernel/machine_kexec.c   | 178 ++++++++++++++++++++++++
 arch/loongarch/kernel/relocate_kernel.S | 125 +++++++++++++++++
 6 files changed, 380 insertions(+), 1 deletion(-)
 create mode 100644 arch/loongarch/include/asm/kexec.h
 create mode 100644 arch/loongarch/kernel/machine_kexec.c
 create mode 100644 arch/loongarch/kernel/relocate_kernel.S

diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 45364cffc793..903c82fa958d 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -409,6 +409,17 @@ config FORCE_MAX_ZONEORDER
 	  The page size is not necessarily 4KB.  Keep this in mind
 	  when choosing a value for this option.
 
+config KEXEC
+	bool "Kexec system call"
+	select KEXEC_CORE
+	help
+	  kexec is a system call that implements the ability to shutdown your
+	  current kernel, and to start another kernel.  It is like a reboot
+	  but it is independent of the system firmware.   And like a reboot
+	  you can start any kernel with it, not just Linux.
+
+	  The name comes from the similarity to the exec system call.
+
 config SECCOMP
 	bool "Enable seccomp to safely compute untrusted bytecode"
 	depends on PROC_FS
diff --git a/arch/loongarch/include/asm/kexec.h b/arch/loongarch/include/asm/kexec.h
new file mode 100644
index 000000000000..5c9e7b5eccb8
--- /dev/null
+++ b/arch/loongarch/include/asm/kexec.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * kexec.h for kexec
+ *
+ * Copyright (C) 2022 Loongson Technology Corporation Limited
+ */
+
+#ifndef _ASM_KEXEC_H
+#define _ASM_KEXEC_H
+
+#include <asm/stacktrace.h>
+#include <asm/page.h>
+
+/* Maximum physical address we can use pages from */
+#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
+/* Maximum address we can reach in physical address mode */
+#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
+ /* Maximum address we can use for the control code buffer */
+#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL)
+
+/* Reserve a page for the control code buffer */
+#define KEXEC_CONTROL_PAGE_SIZE PAGE_SIZE
+
+/* The native architecture */
+#define KEXEC_ARCH KEXEC_ARCH_LOONGARCH
+
+static inline void crash_setup_regs(struct pt_regs *newregs,
+				    struct pt_regs *oldregs)
+{
+	if (oldregs)
+		memcpy(newregs, oldregs, sizeof(*newregs));
+	else
+		prepare_frametrace(newregs);
+}
+
+#define ARCH_HAS_KIMAGE_ARCH
+
+struct kimage_arch {
+	unsigned long boot_flag;
+	unsigned long fdt_addr;
+};
+
+typedef void (*do_kexec_t)(unsigned long boot_flag,
+			   unsigned long fdt_addr,
+			   unsigned long first_ind_entry,
+			   unsigned long jump_addr);
+
+struct kimage;
+extern const unsigned char relocate_new_kernel[];
+extern const size_t relocate_new_kernel_size;
+
+#ifdef CONFIG_SMP
+extern atomic_t kexec_ready_to_reboot;
+extern const unsigned char kexec_smp_wait[];
+extern void kexec_reboot(void);
+#endif
+
+#endif /* !_ASM_KEXEC_H */
diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
index a213e994db68..20b64ac3f128 100644
--- a/arch/loongarch/kernel/Makefile
+++ b/arch/loongarch/kernel/Makefile
@@ -17,6 +17,8 @@ obj-$(CONFIG_CPU_HAS_FPU)	+= fpu.o
 obj-$(CONFIG_MODULES)		+= module.o module-sections.o
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
 
+obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o
+
 obj-$(CONFIG_PROC_FS)		+= proc.o
 
 obj-$(CONFIG_SMP)		+= smp.o
diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S
index 01bac62a6442..22bdf4928325 100644
--- a/arch/loongarch/kernel/head.S
+++ b/arch/loongarch/kernel/head.S
@@ -20,7 +20,12 @@
 
 _head:
 	.word	MZ_MAGIC		/* "MZ", MS-DOS header */
-	.org	0x3c			/* 0x04 ~ 0x3b reserved */
+	.org	0x8
+	.quad	0			/* Image load offset from start of RAM */
+	.dword	_end - _text		/* Effective size of kernel image */
+	.quad	0
+	.dword	kernel_entry		/* Kernel entry point */
+	.org	0x3c			/* 0x28 ~ 0x3b reserved */
 	.long	pe_header - _head	/* Offset to the PE header */
 
 pe_header:
diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c
new file mode 100644
index 000000000000..4ffcd4cd9c8c
--- /dev/null
+++ b/arch/loongarch/kernel/machine_kexec.c
@@ -0,0 +1,178 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * machine_kexec.c for kexec
+ *
+ * Copyright (C) 2022 Loongson Technology Corporation Limited
+ */
+#include <linux/compiler.h>
+#include <linux/cpu.h>
+#include <linux/kexec.h>
+#include <linux/mm.h>
+#include <linux/delay.h>
+#include <linux/libfdt.h>
+#include <linux/of_fdt.h>
+
+#include <asm/bootinfo.h>
+#include <asm/cacheflush.h>
+#include <asm/page.h>
+
+/* 0x100000 ~ 0x200000 is safe */
+#define KEXEC_CTRL_CODE	TO_CACHE(0x100000UL)
+#define KEXEC_BLOB_ADDR	TO_CACHE(0x108000UL)
+
+static unsigned long reboot_code_buffer;
+#ifdef CONFIG_SMP
+void (*relocated_kexec_smp_wait)(void *);
+atomic_t kexec_ready_to_reboot = ATOMIC_INIT(0);
+#endif
+
+static unsigned long jump_addr;
+static unsigned long first_ind_entry;
+static unsigned long boot_flag;
+static unsigned long fdt_addr;
+
+static void kexec_image_info(const struct kimage *kimage)
+{
+	unsigned long i;
+
+	pr_debug("kexec kimage info:\n");
+	pr_debug("\ttype:        %d\n", kimage->type);
+	pr_debug("\tstart:       %lx\n", kimage->start);
+	pr_debug("\thead:        %lx\n", kimage->head);
+	pr_debug("\tnr_segments: %lu\n", kimage->nr_segments);
+
+	for (i = 0; i < kimage->nr_segments; i++) {
+		pr_debug("\t    segment[%lu]: %016lx - %016lx", i,
+			kimage->segment[i].mem,
+			kimage->segment[i].mem + kimage->segment[i].memsz);
+		pr_debug("\t\t0x%lx bytes, %lu pages\n",
+			(unsigned long)kimage->segment[i].memsz,
+			(unsigned long)kimage->segment[i].memsz /  PAGE_SIZE);
+	}
+}
+
+int machine_kexec_prepare(struct kimage *kimage)
+{
+	int i;
+	void *dtb = (void *)KEXEC_BLOB_ADDR;
+
+	kexec_image_info(kimage);
+
+	/* Find the Flattened Device Tree */
+	for (i = 0; i < kimage->nr_segments; i++) {
+		if (!fdt_check_header(kimage->segment[i].buf)) {
+			memcpy(dtb, kimage->segment[i].buf, SZ_64K);
+			kimage->arch.boot_flag = fw_arg0;
+			kimage->arch.fdt_addr = (unsigned long) dtb;
+			break;
+		}
+		continue;
+	}
+
+	/* kexec need a safe page to save reboot_code_buffer */
+	kimage->control_code_page = virt_to_page((void *)KEXEC_CTRL_CODE);
+
+	reboot_code_buffer =
+	  (unsigned long)page_address(kimage->control_code_page);
+	memcpy((void *)reboot_code_buffer, relocate_new_kernel,
+	       relocate_new_kernel_size);
+
+	/* All secondary cpus now may jump to kexec_smp_wait cycle */
+	relocated_kexec_smp_wait = reboot_code_buffer +
+		(void *)(kexec_smp_wait - relocate_new_kernel);
+
+	return 0;
+}
+
+void machine_kexec_cleanup(struct kimage *kimage)
+{
+}
+
+#ifdef CONFIG_SMP
+void kexec_reboot(void)
+{
+	do_kexec_t do_kexec = NULL;
+
+	/* All secondary cpus go to kexec_smp_wait */
+	if (smp_processor_id() > 0) {
+		relocated_kexec_smp_wait(NULL);
+		unreachable();
+	}
+
+	do_kexec = (void *)reboot_code_buffer;
+	do_kexec(boot_flag, fdt_addr, first_ind_entry, jump_addr);
+
+	unreachable();
+}
+
+static void kexec_shutdown_secondary(void *)
+{
+	local_irq_disable();
+	while (!atomic_read(&kexec_ready_to_reboot))
+		cpu_relax();
+
+	kexec_reboot();
+}
+
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+}
+#endif
+
+void machine_shutdown(void)
+{
+	smp_call_function(kexec_shutdown_secondary, NULL, 0);
+}
+
+void machine_kexec(struct kimage *image)
+{
+	unsigned long entry;
+	unsigned long *ptr;
+	struct kimage_arch *internal = &image->arch;
+
+	boot_flag = internal->boot_flag;
+	fdt_addr = internal->fdt_addr;
+
+	jump_addr = (unsigned long)phys_to_virt(image->start);
+
+	first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
+
+	/*
+	 * The generic kexec code builds a page list with physical
+	 * addresses. they are directly accessible through XKPRANGE
+	 * hence the phys_to_virt() call.
+	 */
+	for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE);
+	     ptr = (entry & IND_INDIRECTION) ?
+	       phys_to_virt(entry & PAGE_MASK) : ptr + 1) {
+		if (*ptr & IND_SOURCE || *ptr & IND_INDIRECTION ||
+		    *ptr & IND_DESTINATION)
+			*ptr = (unsigned long) phys_to_virt(*ptr);
+	}
+
+	/* Mark offline before disabling local irq. */
+	set_cpu_online(smp_processor_id(), false);
+
+	/* we do not want to be bothered. */
+	local_irq_disable();
+
+	pr_notice("Will call new kernel at %lx\n", jump_addr);
+	pr_notice("FDT image at %lx\n", fdt_addr);
+	pr_notice("Bye ...\n");
+
+	/* Make reboot code buffer available to the boot CPU. */
+	flush_cache_all();
+
+	atomic_set(&kexec_ready_to_reboot, 1);
+
+	/*
+	 * We know we were online, and there will be no incoming IPIs at
+	 * this point.
+	 */
+	set_cpu_online(smp_processor_id(), true);
+
+	/* Ensure remote CPUs observe that we're online before rebooting. */
+	smp_mb__after_atomic();
+
+	kexec_reboot();
+}
diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S
new file mode 100644
index 000000000000..d1f242f74ea8
--- /dev/null
+++ b/arch/loongarch/kernel/relocate_kernel.S
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * relocate_kernel.S for kexec
+ *
+ * Copyright (C) 2022 Loongson Technology Corporation Limited
+ */
+
+#include <linux/kexec.h>
+
+#include <asm/asm.h>
+#include <asm/asmmacro.h>
+#include <asm/regdef.h>
+#include <asm/loongarch.h>
+#include <asm/stackframe.h>
+#include <asm/addrspace.h>
+
+#define IPI_REG_BASE 0x1fe01000
+
+SYM_CODE_START(relocate_new_kernel)
+	/*
+	 * s0: Boot flag passed to the new kernel
+	 * s1: Virt address of the FDT image
+	 * s2: Pointer to the current entry
+	 * s3: Virt address to jump to after relocation
+	 */
+	move		s0, a0
+	move		s1, a1
+	move		s2, a2
+	move		s3, a3
+
+process_entry:
+	PTR_L		s4, s2, 0
+	PTR_ADDI	s2, s2, SZREG
+
+	/* destination page */
+	andi		s5, s4, IND_DESTINATION
+	beqz		s5, 1f
+	li.w		t0, ~0x1
+	and		s6, s4, t0	/* store destination addr in s6 */
+	b		process_entry
+
+1:
+	/* indirection page, update s2	*/
+	andi		s5, s4, IND_INDIRECTION
+	beqz		s5, 1f
+	li.w		t0, ~0x2
+	and		s2, s4, t0
+	b		process_entry
+
+1:
+	/* done page */
+	andi		s5, s4, IND_DONE
+	beqz		s5, 1f
+	b		done
+1:
+	/* source page */
+	andi		s5, s4, IND_SOURCE
+	beqz		s5, process_entry
+	li.w		t0, ~0x8
+	and		s4, s4, t0
+	li.w		s8, (1 << _PAGE_SHIFT) / SZREG
+
+copy_word:
+	/* copy page word by word */
+	REG_L		s7, s4, 0
+	REG_S		s7, s6, 0
+	PTR_ADDI	s6, s6, SZREG
+	PTR_ADDI	s4, s4, SZREG
+	LONG_ADDI	s8, s8, -1
+	beqz		s8, process_entry
+	b		copy_word
+	b		process_entry
+
+done:
+	dbar		0
+
+	move		a0, s0
+	move		a1, s1
+	/* jump to the new kernel */
+	jr		s3
+SYM_CODE_END(relocate_new_kernel)
+
+#ifdef CONFIG_SMP
+/*
+ * Other CPUs should wait until code is relocated and
+ * then start at entry (?) point.
+ */
+SYM_CODE_START(kexec_smp_wait)
+	li.d		t0, IPI_REG_BASE
+	li.d		t1, UNCACHE_BASE
+	or		t0, t0, t1
+
+	/*
+	 * s1:initfn
+	 * t0:base t1:cpuid t2:node t3:core t4:count
+	 */
+	csrrd		t1, LOONGARCH_CSR_CPUID
+	andi		t1, t1, CSR_CPUID_COREID
+	andi		t3, t1, 0x3
+	slli.w		t3, t3, 8              /* get core id */
+	or		t0, t0, t3
+	andi		t2, t1, 0x3c
+	slli.d		t2, t2, 42             /* get node id */
+	or		t0, t0, t2
+
+1:	li.w		t4, 0x100              /* wait for init loop */
+2:	addi.w		t4, t4, -1             /* limit mailbox access */
+	bnez		t4, 2b
+	ld.w		s1, t0, 0x20           /* check PC as an indicator */
+	beqz		s1, 1b
+	ld.d		s1, t0, 0x20           /* get PC via mailbox */
+	ld.d		sp, t0, 0x28           /* get SP via mailbox */
+	ld.d		tp, t0, 0x30           /* get TP via mailbox */
+
+	li.d		t0, CACHE_BASE
+	or		s1, s1, t0
+	jr		s1                     /* jump to initial PC */
+SYM_CODE_END(kexec_smp_wait)
+#endif
+
+relocate_new_kernel_end:
+
+SYM_DATA_START(relocate_new_kernel_size)
+	PTR		relocate_new_kernel_end - relocate_new_kernel
+SYM_DATA_END(relocate_new_kernel_size)
-- 
2.36.0


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 2/3] LoongArch: Add kdump support
  2022-08-29  4:37 [PATCH 0/3] LoongArch: Add kexec/kdump support Youling Tang
  2022-08-29  4:37 ` [PATCH 1/3] LoongArch: Add kexec support Youling Tang
@ 2022-08-29  4:37 ` Youling Tang
  2022-09-04 12:21   ` Huacai Chen
  2022-08-29  4:37 ` [PATCH 3/3] LoongArch: Enable CONFIG_KEXEC Youling Tang
  2 siblings, 1 reply; 20+ messages in thread
From: Youling Tang @ 2022-08-29  4:37 UTC (permalink / raw)
  To: Huacai Chen, Baoquan He, Eric Biederman
  Cc: WANG Xuerui, Vivek Goyal, Dave Young, Guo Ren, Jiaxun Yang,
	kexec, loongarch, linux-kernel

This patch adds support for kdump, the kernel will reserve a region
for the crash kernel and jump there on panic.

Arch-specific functions are added to allow for implementing a crash
dump file interface, /proc/vmcore, which can be viewed as a ELF file.

A user space tool, like kexec-tools, is responsible for allocating a
separate region for the core's ELF header within crash kdump kernel
memory and filling it in when executing kexec_load().

Then, its location will be advertised to crash dump kernel via a new
device-tree property, "linux,elfcorehdr", and crash dump kernel preserves
the region for later use with fdt_reserve_elfcorehdr() at boot time.

At the same time, it will also limit the crash kdump kernel to the
crashkernel area via a new device-tree property, "linux, usable-memory-range",
so as not to destroy the original kernel dump data.

On crash dump kernel, /proc/vmcore will access the primary kernel's memory
with copy_oldmem_page().

I tested this on  LoongArch 3A5000 machine and works as expected (Suggest
crashkernel parameter is "crashkernel=512M@2320M"), you may test it by
triggering a crash through /proc/sysrq_trigger:

 $ sudo kexec -p /boot/vmlinux-kdump --reuse-cmdline --append="nr_cpus=1"
 # echo c > /proc/sysrq_trigger

Signed-off-by: Youling Tang <tangyouling@loongson.cn>
---
 arch/loongarch/Kconfig                  |  22 ++++++
 arch/loongarch/Makefile                 |   4 +
 arch/loongarch/kernel/Makefile          |   3 +-
 arch/loongarch/kernel/crash.c           | 100 ++++++++++++++++++++++++
 arch/loongarch/kernel/crash_dump.c      |  19 +++++
 arch/loongarch/kernel/machine_kexec.c   |  12 ++-
 arch/loongarch/kernel/mem.c             |   6 ++
 arch/loongarch/kernel/relocate_kernel.S |   6 ++
 arch/loongarch/kernel/setup.c           |  49 ++++++++++++
 arch/loongarch/kernel/traps.c           |   4 +
 10 files changed, 217 insertions(+), 8 deletions(-)
 create mode 100644 arch/loongarch/kernel/crash.c
 create mode 100644 arch/loongarch/kernel/crash_dump.c

diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 903c82fa958d..7c1b07a5b5bd 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -420,6 +420,28 @@ config KEXEC
 
 	  The name comes from the similarity to the exec system call.
 
+config CRASH_DUMP
+	bool "Build kdump crash kernel"
+	help
+	  Generate crash dump after being started by kexec. This should
+	  be normally only set in special crash dump kernels which are
+	  loaded in the main kernel with kexec-tools into a specially
+	  reserved region and then later executed after a crash by
+	  kdump/kexec.
+
+	  For more details see Documentation/admin-guide/kdump/kdump.rst
+
+config PHYSICAL_START
+	hex "Physical address where the kernel is loaded"
+	default "0x9000000091000000" if 64BIT
+	depends on CRASH_DUMP
+	help
+	  This gives the XKPRANGE address where the kernel is loaded.
+	  If you plan to use kernel for capturing the crash dump change
+	  this value to start of the reserved region (the "X" value as
+	  specified in the "crashkernel=YM@XM" command line boot parameter
+	  passed to the panic-ed kernel).
+
 config SECCOMP
 	bool "Enable seccomp to safely compute untrusted bytecode"
 	depends on PROC_FS
diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
index 4bc47f47cfd8..7dabd580426d 100644
--- a/arch/loongarch/Makefile
+++ b/arch/loongarch/Makefile
@@ -48,7 +48,11 @@ KBUILD_CFLAGS_MODULE		+= -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs
 cflags-y += -ffreestanding
 cflags-y += $(call cc-option, -mno-check-zero-division)
 
+ifdef CONFIG_PHYSICAL_START
+load-y		= $(CONFIG_PHYSICAL_START)
+else
 load-y		= 0x9000000000200000
+endif
 bootvars-y	= VMLINUX_LOAD_ADDRESS=$(load-y)
 
 drivers-$(CONFIG_PCI)		+= arch/loongarch/pci/
diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
index 20b64ac3f128..df5aea129364 100644
--- a/arch/loongarch/kernel/Makefile
+++ b/arch/loongarch/kernel/Makefile
@@ -17,7 +17,8 @@ obj-$(CONFIG_CPU_HAS_FPU)	+= fpu.o
 obj-$(CONFIG_MODULES)		+= module.o module-sections.o
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
 
-obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o
+obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o crash.o
+obj-$(CONFIG_CRASH_DUMP)        += crash_dump.o
 
 obj-$(CONFIG_PROC_FS)		+= proc.o
 
diff --git a/arch/loongarch/kernel/crash.c b/arch/loongarch/kernel/crash.c
new file mode 100644
index 000000000000..b4f249ec6301
--- /dev/null
+++ b/arch/loongarch/kernel/crash.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022 Loongson Technology Corporation Limited
+ *
+ * Derived from MIPS
+ */
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/reboot.h>
+#include <linux/crash_dump.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
+#include <asm/cacheflush.h>
+#include <asm/kexec.h>
+
+static cpumask_t cpus_in_crash = CPU_MASK_NONE;
+
+#ifdef CONFIG_SMP
+static void crash_shutdown_secondary(void *passed_regs)
+{
+	struct pt_regs *regs = passed_regs;
+	int cpu = smp_processor_id();
+
+	/*
+	 * If we are passed registers, use those.  Otherwise get the
+	 * regs from the last interrupt, which should be correct, as
+	 * we are in an interrupt.  But if the regs are not there,
+	 * pull them from the top of the stack.  They are probably
+	 * wrong, but we need something to keep from crashing again.
+	 */
+	if (!regs)
+		regs = get_irq_regs();
+	if (!regs)
+		regs = task_pt_regs(current);
+
+	local_irq_disable();
+	if (!cpumask_test_cpu(cpu, &cpus_in_crash))
+		crash_save_cpu(regs, cpu);
+	cpumask_set_cpu(cpu, &cpus_in_crash);
+
+	while (!atomic_read(&kexec_ready_to_reboot))
+		cpu_relax();
+
+	kexec_reboot();
+}
+
+/* Override the weak function in kernel/panic.c */
+void crash_smp_send_stop(void)
+{
+	static int cpus_stopped;
+	unsigned long timeout;
+	unsigned int ncpus;
+
+	/*
+	 * This function can be called twice in panic path, but obviously
+	 * we execute this only once.
+	 */
+	if (cpus_stopped)
+		return;
+
+	cpus_stopped = 1;
+
+	 /* Excluding the panic cpu */
+	ncpus = num_online_cpus() - 1;
+
+	smp_call_function(crash_shutdown_secondary, NULL, 0);
+	smp_wmb();
+
+	/*
+	 * The crash CPU sends an IPI and wait for other CPUs to
+	 * respond. Delay of at least 10 seconds.
+	 */
+	pr_emerg("Sending IPI to other cpus...\n");
+	timeout = USEC_PER_SEC * 10;
+	while ((cpumask_weight(&cpus_in_crash) < ncpus) && timeout--) {
+		cpu_relax();
+		udelay(1);
+	}
+}
+
+#endif
+
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+	int crashing_cpu;
+
+	local_irq_disable();
+
+	crashing_cpu = smp_processor_id();
+	crash_save_cpu(regs, crashing_cpu);
+
+	/* shutdown non-crashing cpus */
+	crash_smp_send_stop();
+	cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
+
+	pr_info("Starting crashdump kernel...\n");
+}
diff --git a/arch/loongarch/kernel/crash_dump.c b/arch/loongarch/kernel/crash_dump.c
new file mode 100644
index 000000000000..13e5d2f7870d
--- /dev/null
+++ b/arch/loongarch/kernel/crash_dump.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/highmem.h>
+#include <linux/crash_dump.h>
+#include <linux/io.h>
+
+ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn,
+			 size_t csize, unsigned long offset)
+{
+	void  *vaddr;
+
+	if (!csize)
+		return 0;
+
+	vaddr = kmap_local_pfn(pfn);
+	csize = copy_to_iter(vaddr + offset, csize, iter);
+	kunmap_local(vaddr);
+
+	return csize;
+}
diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c
index 4ffcd4cd9c8c..f793a3ff09a3 100644
--- a/arch/loongarch/kernel/machine_kexec.c
+++ b/arch/loongarch/kernel/machine_kexec.c
@@ -69,7 +69,7 @@ int machine_kexec_prepare(struct kimage *kimage)
 		continue;
 	}
 
-	/* kexec need a safe page to save reboot_code_buffer */
+	/* kexec/kdump need a safe page to save reboot_code_buffer */
 	kimage->control_code_page = virt_to_page((void *)KEXEC_CTRL_CODE);
 
 	reboot_code_buffer =
@@ -113,10 +113,6 @@ static void kexec_shutdown_secondary(void *)
 
 	kexec_reboot();
 }
-
-void machine_crash_shutdown(struct pt_regs *regs)
-{
-}
 #endif
 
 void machine_shutdown(void)
@@ -135,7 +131,8 @@ void machine_kexec(struct kimage *image)
 
 	jump_addr = (unsigned long)phys_to_virt(image->start);
 
-	first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
+	if (image->type == KEXEC_TYPE_DEFAULT)
+		first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
 
 	/*
 	 * The generic kexec code builds a page list with physical
@@ -167,7 +164,8 @@ void machine_kexec(struct kimage *image)
 
 	/*
 	 * We know we were online, and there will be no incoming IPIs at
-	 * this point.
+	 * this point. Mark online again before rebooting so that the crash
+	 * analysis tool will see us correctly.
 	 */
 	set_cpu_online(smp_processor_id(), true);
 
diff --git a/arch/loongarch/kernel/mem.c b/arch/loongarch/kernel/mem.c
index 7423361b0ebc..c6def6ff81c8 100644
--- a/arch/loongarch/kernel/mem.c
+++ b/arch/loongarch/kernel/mem.c
@@ -5,6 +5,7 @@
 #include <linux/efi.h>
 #include <linux/initrd.h>
 #include <linux/memblock.h>
+#include <linux/of_fdt.h>
 
 #include <asm/bootinfo.h>
 #include <asm/loongson.h>
@@ -61,4 +62,9 @@ void __init memblock_init(void)
 
 	/* Reserve the initrd */
 	reserve_initrd_mem();
+
+	/* Mainly reserved memory for the elf core head */
+	early_init_fdt_scan_reserved_mem();
+	/* Parse linux,usable-memory-range is for crash dump kernel */
+	early_init_dt_check_for_usable_mem_range();
 }
diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S
index d1f242f74ea8..4ee5ac4ac2d7 100644
--- a/arch/loongarch/kernel/relocate_kernel.S
+++ b/arch/loongarch/kernel/relocate_kernel.S
@@ -28,6 +28,12 @@ SYM_CODE_START(relocate_new_kernel)
 	move		s2, a2
 	move		s3, a3
 
+	/*
+	 * In case of a kdump/crash kernel, the indirection page is not
+	 * populated as the kernel is directly copied to a reserved location
+	 */
+	beqz		s2, done
+
 process_entry:
 	PTR_L		s4, s2, 0
 	PTR_ADDI	s2, s2, SZREG
diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
index f938aae3e92c..ea34b77e402f 100644
--- a/arch/loongarch/kernel/setup.c
+++ b/arch/loongarch/kernel/setup.c
@@ -19,6 +19,8 @@
 #include <linux/memblock.h>
 #include <linux/initrd.h>
 #include <linux/ioport.h>
+#include <linux/kexec.h>
+#include <linux/crash_dump.h>
 #include <linux/root_dev.h>
 #include <linux/console.h>
 #include <linux/pfn.h>
@@ -186,6 +188,50 @@ static int __init early_parse_mem(char *p)
 }
 early_param("mem", early_parse_mem);
 
+static void __init loongarch_parse_crashkernel(void)
+{
+#ifdef CONFIG_KEXEC
+	unsigned long long start;
+	unsigned long long total_mem;
+	unsigned long long crash_size, crash_base;
+	int ret;
+
+	total_mem = memblock_phys_mem_size();
+	ret = parse_crashkernel(boot_command_line, total_mem,
+				&crash_size, &crash_base);
+	if (ret != 0 || crash_size <= 0)
+		return;
+
+
+	start = memblock_phys_alloc_range(crash_size, 1, crash_base,
+					crash_base + crash_size);
+	if (start != crash_base) {
+		pr_warn("Invalid memory region reserved for crash kernel\n");
+		return;
+	}
+
+	crashk_res.start = crash_base;
+	crashk_res.end	 = crash_base + crash_size - 1;
+#endif
+}
+
+static void __init request_crashkernel(struct resource *res)
+{
+#ifdef CONFIG_KEXEC
+	int ret;
+
+	if (crashk_res.start == crashk_res.end)
+		return;
+
+	ret = request_resource(res, &crashk_res);
+	if (!ret)
+		pr_info("Reserving %ldMB of memory at %ldMB for crashkernel\n",
+			(unsigned long)((crashk_res.end -
+					 crashk_res.start + 1) >> 20),
+			(unsigned long)(crashk_res.start  >> 20));
+#endif
+}
+
 void __init platform_init(void)
 {
 	efi_init();
@@ -229,6 +275,8 @@ static void __init arch_mem_init(char **cmdline_p)
 
 	check_kernel_sections_mem();
 
+	loongarch_parse_crashkernel();
+
 	/*
 	 * In order to reduce the possibility of kernel panic when failed to
 	 * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate
@@ -290,6 +338,7 @@ static void __init resource_init(void)
 		request_resource(res, &code_resource);
 		request_resource(res, &data_resource);
 		request_resource(res, &bss_resource);
+		request_crashkernel(res);
 	}
 }
 
diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
index aa1c95aaf595..0e610872f3f4 100644
--- a/arch/loongarch/kernel/traps.c
+++ b/arch/loongarch/kernel/traps.c
@@ -10,6 +10,7 @@
 #include <linux/entry-common.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/kexec.h>
 #include <linux/module.h>
 #include <linux/extable.h>
 #include <linux/mm.h>
@@ -246,6 +247,9 @@ void __noreturn die(const char *str, struct pt_regs *regs)
 
 	oops_exit();
 
+	if (regs && kexec_should_crash(current))
+		crash_kexec(regs);
+
 	if (in_interrupt())
 		panic("Fatal exception in interrupt");
 
-- 
2.36.0


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 3/3] LoongArch: Enable CONFIG_KEXEC
  2022-08-29  4:37 [PATCH 0/3] LoongArch: Add kexec/kdump support Youling Tang
  2022-08-29  4:37 ` [PATCH 1/3] LoongArch: Add kexec support Youling Tang
  2022-08-29  4:37 ` [PATCH 2/3] LoongArch: Add kdump support Youling Tang
@ 2022-08-29  4:37 ` Youling Tang
  2 siblings, 0 replies; 20+ messages in thread
From: Youling Tang @ 2022-08-29  4:37 UTC (permalink / raw)
  To: Huacai Chen, Baoquan He, Eric Biederman
  Cc: WANG Xuerui, Vivek Goyal, Dave Young, Guo Ren, Jiaxun Yang,
	kexec, loongarch, linux-kernel

Defaults enable CONFIG_KEXEC to convenient kexec operations.

Signed-off-by: Youling Tang <tangyouling@loongson.cn>
---
 arch/loongarch/configs/loongson3_defconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig
index 68c9609670d4..52db7a3a79f3 100644
--- a/arch/loongarch/configs/loongson3_defconfig
+++ b/arch/loongarch/configs/loongson3_defconfig
@@ -45,6 +45,7 @@ CONFIG_SMP=y
 CONFIG_HOTPLUG_CPU=y
 CONFIG_NR_CPUS=64
 CONFIG_NUMA=y
+CONFIG_KEXEC=y
 CONFIG_PAGE_SIZE_16KB=y
 CONFIG_HZ_250=y
 CONFIG_ACPI=y
-- 
2.36.0


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/3] LoongArch: Add kexec support
  2022-08-29  4:37 ` [PATCH 1/3] LoongArch: Add kexec support Youling Tang
@ 2022-08-30  1:53   ` Jinyang He
  2022-08-30  3:25     ` Youling Tang
  2022-09-05  1:01   ` Youling Tang
  1 sibling, 1 reply; 20+ messages in thread
From: Jinyang He @ 2022-08-30  1:53 UTC (permalink / raw)
  To: Youling Tang, Huacai Chen, Baoquan He, Eric Biederman
  Cc: WANG Xuerui, Vivek Goyal, Dave Young, Guo Ren, Jiaxun Yang,
	kexec, loongarch, linux-kernel

Hi, Youling,


On 08/29/2022 12:37 PM, Youling Tang wrote:
> Add three new files, kexec.h, machine_kexec.c and relocate_kernel.S to the
> LoongArch architecture that add support for the kexec re-boot mechanis
> (CONFIG_KEXEC) on LoongArch platforms.
>
> Supports loading vmlinux (vmlinux.elf) in ELF format and vmlinux.efi in
> PE format.
>
> I tested this on  LoongArch 3A5000 machine and works as expected,
>
>   $ sudo kexec -l /boot/vmlinux.efi --reuse-cmdline
>   $ sudo kexec -e
>
> Signed-off-by: Youling Tang <tangyouling@loongson.cn>
> ---
>   arch/loongarch/Kconfig                  |  11 ++
>   arch/loongarch/include/asm/kexec.h      |  58 ++++++++
>   arch/loongarch/kernel/Makefile          |   2 +
>   arch/loongarch/kernel/head.S            |   7 +-
>   arch/loongarch/kernel/machine_kexec.c   | 178 ++++++++++++++++++++++++
>   arch/loongarch/kernel/relocate_kernel.S | 125 +++++++++++++++++
>   6 files changed, 380 insertions(+), 1 deletion(-)
>   create mode 100644 arch/loongarch/include/asm/kexec.h
>   create mode 100644 arch/loongarch/kernel/machine_kexec.c
>   create mode 100644 arch/loongarch/kernel/relocate_kernel.S
>
> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
> index 45364cffc793..903c82fa958d 100644
> --- a/arch/loongarch/Kconfig
> +++ b/arch/loongarch/Kconfig
> @@ -409,6 +409,17 @@ config FORCE_MAX_ZONEORDER
>   	  The page size is not necessarily 4KB.  Keep this in mind
>   	  when choosing a value for this option.
>   
> +config KEXEC
> +	bool "Kexec system call"
> +	select KEXEC_CORE
> +	help
> +	  kexec is a system call that implements the ability to shutdown your
> +	  current kernel, and to start another kernel.  It is like a reboot
> +	  but it is independent of the system firmware.   And like a reboot
> +	  you can start any kernel with it, not just Linux.
> +
> +	  The name comes from the similarity to the exec system call.
> +
>   config SECCOMP
>   	bool "Enable seccomp to safely compute untrusted bytecode"
>   	depends on PROC_FS
> diff --git a/arch/loongarch/include/asm/kexec.h b/arch/loongarch/include/asm/kexec.h
> new file mode 100644
> index 000000000000..5c9e7b5eccb8
> --- /dev/null
> +++ b/arch/loongarch/include/asm/kexec.h
> @@ -0,0 +1,58 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * kexec.h for kexec
> + *
> + * Copyright (C) 2022 Loongson Technology Corporation Limited
> + */
> +
> +#ifndef _ASM_KEXEC_H
> +#define _ASM_KEXEC_H
> +
> +#include <asm/stacktrace.h>
> +#include <asm/page.h>
> +
> +/* Maximum physical address we can use pages from */
> +#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
> +/* Maximum address we can reach in physical address mode */
> +#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
> + /* Maximum address we can use for the control code buffer */
> +#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL)
> +
> +/* Reserve a page for the control code buffer */
> +#define KEXEC_CONTROL_PAGE_SIZE PAGE_SIZE
> +
> +/* The native architecture */
> +#define KEXEC_ARCH KEXEC_ARCH_LOONGARCH
> +
> +static inline void crash_setup_regs(struct pt_regs *newregs,
> +				    struct pt_regs *oldregs)
> +{
> +	if (oldregs)
> +		memcpy(newregs, oldregs, sizeof(*newregs));
> +	else
> +		prepare_frametrace(newregs);
> +}
> +
> +#define ARCH_HAS_KIMAGE_ARCH
> +
> +struct kimage_arch {
> +	unsigned long boot_flag;
> +	unsigned long fdt_addr;
> +};
> +
> +typedef void (*do_kexec_t)(unsigned long boot_flag,
> +			   unsigned long fdt_addr,
> +			   unsigned long first_ind_entry,
> +			   unsigned long jump_addr);
> +
> +struct kimage;
> +extern const unsigned char relocate_new_kernel[];
> +extern const size_t relocate_new_kernel_size;
> +
> +#ifdef CONFIG_SMP
> +extern atomic_t kexec_ready_to_reboot;
> +extern const unsigned char kexec_smp_wait[];
> +extern void kexec_reboot(void);
> +#endif
> +
> +#endif /* !_ASM_KEXEC_H */
> diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
> index a213e994db68..20b64ac3f128 100644
> --- a/arch/loongarch/kernel/Makefile
> +++ b/arch/loongarch/kernel/Makefile
> @@ -17,6 +17,8 @@ obj-$(CONFIG_CPU_HAS_FPU)	+= fpu.o
>   obj-$(CONFIG_MODULES)		+= module.o module-sections.o
>   obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
>   
> +obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o
> +
>   obj-$(CONFIG_PROC_FS)		+= proc.o
>   
>   obj-$(CONFIG_SMP)		+= smp.o
> diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S
> index 01bac62a6442..22bdf4928325 100644
> --- a/arch/loongarch/kernel/head.S
> +++ b/arch/loongarch/kernel/head.S
> @@ -20,7 +20,12 @@
>   
>   _head:
>   	.word	MZ_MAGIC		/* "MZ", MS-DOS header */
> -	.org	0x3c			/* 0x04 ~ 0x3b reserved */
> +	.org	0x8
> +	.quad	0			/* Image load offset from start of RAM */
> +	.dword	_end - _text		/* Effective size of kernel image */
> +	.quad	0
> +	.dword	kernel_entry		/* Kernel entry point */
> +	.org	0x3c			/* 0x28 ~ 0x3b reserved */
>   	.long	pe_header - _head	/* Offset to the PE header */
>   
>   pe_header:
> diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c
> new file mode 100644
> index 000000000000..4ffcd4cd9c8c
> --- /dev/null
> +++ b/arch/loongarch/kernel/machine_kexec.c
> @@ -0,0 +1,178 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * machine_kexec.c for kexec
> + *
> + * Copyright (C) 2022 Loongson Technology Corporation Limited
> + */
> +#include <linux/compiler.h>
> +#include <linux/cpu.h>
> +#include <linux/kexec.h>
> +#include <linux/mm.h>
> +#include <linux/delay.h>
> +#include <linux/libfdt.h>
> +#include <linux/of_fdt.h>
> +
> +#include <asm/bootinfo.h>
> +#include <asm/cacheflush.h>
> +#include <asm/page.h>
> +
> +/* 0x100000 ~ 0x200000 is safe */
> +#define KEXEC_CTRL_CODE	TO_CACHE(0x100000UL)
> +#define KEXEC_BLOB_ADDR	TO_CACHE(0x108000UL)
> +
> +static unsigned long reboot_code_buffer;
> +#ifdef CONFIG_SMP
> +void (*relocated_kexec_smp_wait)(void *);
> +atomic_t kexec_ready_to_reboot = ATOMIC_INIT(0);
> +#endif
> +
> +static unsigned long jump_addr;
> +static unsigned long first_ind_entry;
> +static unsigned long boot_flag;
> +static unsigned long fdt_addr;
> +
> +static void kexec_image_info(const struct kimage *kimage)
> +{
> +	unsigned long i;
> +
> +	pr_debug("kexec kimage info:\n");
> +	pr_debug("\ttype:        %d\n", kimage->type);
> +	pr_debug("\tstart:       %lx\n", kimage->start);
> +	pr_debug("\thead:        %lx\n", kimage->head);
> +	pr_debug("\tnr_segments: %lu\n", kimage->nr_segments);
> +
> +	for (i = 0; i < kimage->nr_segments; i++) {
> +		pr_debug("\t    segment[%lu]: %016lx - %016lx", i,
> +			kimage->segment[i].mem,
> +			kimage->segment[i].mem + kimage->segment[i].memsz);
> +		pr_debug("\t\t0x%lx bytes, %lu pages\n",
> +			(unsigned long)kimage->segment[i].memsz,
> +			(unsigned long)kimage->segment[i].memsz /  PAGE_SIZE);
> +	}
> +}
> +
> +int machine_kexec_prepare(struct kimage *kimage)
> +{
> +	int i;
> +	void *dtb = (void *)KEXEC_BLOB_ADDR;
> +
> +	kexec_image_info(kimage);
> +
> +	/* Find the Flattened Device Tree */
> +	for (i = 0; i < kimage->nr_segments; i++) {
> +		if (!fdt_check_header(kimage->segment[i].buf)) {
> +			memcpy(dtb, kimage->segment[i].buf, SZ_64K);
> +			kimage->arch.boot_flag = fw_arg0;
> +			kimage->arch.fdt_addr = (unsigned long) dtb;
> +			break;
> +		}
> +		continue;
> +	}
> +
> +	/* kexec need a safe page to save reboot_code_buffer */
> +	kimage->control_code_page = virt_to_page((void *)KEXEC_CTRL_CODE);
> +
> +	reboot_code_buffer =
> +	  (unsigned long)page_address(kimage->control_code_page);
> +	memcpy((void *)reboot_code_buffer, relocate_new_kernel,
> +	       relocate_new_kernel_size);
It copys same content to KEXEC_CTRL_CODE each time, could we do this at 
boot time?

BTW, our system always keep the low-2MB no used, on mips-loongson or
LoongArch. Is that necessary on LoongArch? We cannot use parameter
'mem=YYM' normally but 'mem=YYM@2M' is ok. And the low-2MB is not
in virtual memory management, although we can get it in kernel.

In kexec/kdump process, we can follows kimage_alloc_control_pages().
When the boot cpu copy complete the second-kernels, all cpus can jump
to a kernel-entry-trampoline which is in kernel image. Then we don't
worry about the code can be destroyed. The kernel-entry-trampoline
get its cpuid, keep non-boot cpus do as kexec_smp_wait and let boot
cpu go kernel-entry. In this way we can drop the low-2MB IMO.

> +
> +	/* All secondary cpus now may jump to kexec_smp_wait cycle */
> +	relocated_kexec_smp_wait = reboot_code_buffer +
> +		(void *)(kexec_smp_wait - relocate_new_kernel);
> +
> +	return 0;
> +}
> +
> +void machine_kexec_cleanup(struct kimage *kimage)
> +{
> +}
> +
> +#ifdef CONFIG_SMP
> +void kexec_reboot(void)
> +{
> +	do_kexec_t do_kexec = NULL;
> +
> +	/* All secondary cpus go to kexec_smp_wait */
> +	if (smp_processor_id() > 0) {
> +		relocated_kexec_smp_wait(NULL);
> +		unreachable();
> +	}
> +
> +	do_kexec = (void *)reboot_code_buffer;
> +	do_kexec(boot_flag, fdt_addr, first_ind_entry, jump_addr);
> +
> +	unreachable();
> +}
> +
> +static void kexec_shutdown_secondary(void *)
> +{
> +	local_irq_disable();
> +	while (!atomic_read(&kexec_ready_to_reboot))
> +		cpu_relax();
> +
> +	kexec_reboot();
> +}
> +
> +void machine_crash_shutdown(struct pt_regs *regs)
> +{
> +}
> +#endif
> +
> +void machine_shutdown(void)
> +{
> +	smp_call_function(kexec_shutdown_secondary, NULL, 0);
> +}
> +
> +void machine_kexec(struct kimage *image)
> +{
> +	unsigned long entry;
> +	unsigned long *ptr;
> +	struct kimage_arch *internal = &image->arch;
> +
> +	boot_flag = internal->boot_flag;
> +	fdt_addr = internal->fdt_addr;
> +
> +	jump_addr = (unsigned long)phys_to_virt(image->start);
> +
> +	first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
> +
> +	/*
> +	 * The generic kexec code builds a page list with physical
> +	 * addresses. they are directly accessible through XKPRANGE
> +	 * hence the phys_to_virt() call.
> +	 */
> +	for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE);
> +	     ptr = (entry & IND_INDIRECTION) ?
> +	       phys_to_virt(entry & PAGE_MASK) : ptr + 1) {
> +		if (*ptr & IND_SOURCE || *ptr & IND_INDIRECTION ||
> +		    *ptr & IND_DESTINATION)
> +			*ptr = (unsigned long) phys_to_virt(*ptr);
> +	}
> +
> +	/* Mark offline before disabling local irq. */
> +	set_cpu_online(smp_processor_id(), false);
> +
> +	/* we do not want to be bothered. */
> +	local_irq_disable();
> +
> +	pr_notice("Will call new kernel at %lx\n", jump_addr);
> +	pr_notice("FDT image at %lx\n", fdt_addr);
> +	pr_notice("Bye ...\n");
> +
> +	/* Make reboot code buffer available to the boot CPU. */
> +	flush_cache_all();
> +
> +	atomic_set(&kexec_ready_to_reboot, 1);
> +
> +	/*
> +	 * We know we were online, and there will be no incoming IPIs at
> +	 * this point.
> +	 */
> +	set_cpu_online(smp_processor_id(), true);
> +
> +	/* Ensure remote CPUs observe that we're online before rebooting. */
> +	smp_mb__after_atomic();
> +
> +	kexec_reboot();
> +}
> diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S
> new file mode 100644
> index 000000000000..d1f242f74ea8
> --- /dev/null
> +++ b/arch/loongarch/kernel/relocate_kernel.S
> @@ -0,0 +1,125 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * relocate_kernel.S for kexec
> + *
> + * Copyright (C) 2022 Loongson Technology Corporation Limited
> + */
> +
> +#include <linux/kexec.h>
> +
> +#include <asm/asm.h>
> +#include <asm/asmmacro.h>
> +#include <asm/regdef.h>
> +#include <asm/loongarch.h>
> +#include <asm/stackframe.h>
> +#include <asm/addrspace.h>
> +
> +#define IPI_REG_BASE 0x1fe01000
> +
> +SYM_CODE_START(relocate_new_kernel)
> +	/*
> +	 * s0: Boot flag passed to the new kernel
> +	 * s1: Virt address of the FDT image
> +	 * s2: Pointer to the current entry
> +	 * s3: Virt address to jump to after relocation
> +	 */
> +	move		s0, a0
> +	move		s1, a1
> +	move		s2, a2
> +	move		s3, a3
> +
> +process_entry:
> +	PTR_L		s4, s2, 0
> +	PTR_ADDI	s2, s2, SZREG
> +
> +	/* destination page */
> +	andi		s5, s4, IND_DESTINATION
> +	beqz		s5, 1f
> +	li.w		t0, ~0x1
> +	and		s6, s4, t0	/* store destination addr in s6 */
> +	b		process_entry
> +
> +1:
> +	/* indirection page, update s2	*/
> +	andi		s5, s4, IND_INDIRECTION
> +	beqz		s5, 1f
> +	li.w		t0, ~0x2
> +	and		s2, s4, t0
> +	b		process_entry
> +
> +1:
> +	/* done page */
> +	andi		s5, s4, IND_DONE
> +	beqz		s5, 1f
> +	b		done
> +1:
> +	/* source page */
> +	andi		s5, s4, IND_SOURCE
> +	beqz		s5, process_entry
> +	li.w		t0, ~0x8
> +	and		s4, s4, t0
> +	li.w		s8, (1 << _PAGE_SHIFT) / SZREG
> +
> +copy_word:
> +	/* copy page word by word */
> +	REG_L		s7, s4, 0
> +	REG_S		s7, s6, 0
> +	PTR_ADDI	s6, s6, SZREG
> +	PTR_ADDI	s4, s4, SZREG
> +	LONG_ADDI	s8, s8, -1
> +	beqz		s8, process_entry
> +	b		copy_word
> +	b		process_entry
> +
> +done:
> +	dbar		0
ibar, too?
> +
> +	move		a0, s0
> +	move		a1, s1
> +	/* jump to the new kernel */
> +	jr		s3
> +SYM_CODE_END(relocate_new_kernel)
> +
> +#ifdef CONFIG_SMP
> +/*
> + * Other CPUs should wait until code is relocated and
> + * then start at entry (?) point.
> + */
> +SYM_CODE_START(kexec_smp_wait)
> +	li.d		t0, IPI_REG_BASE
> +	li.d		t1, UNCACHE_BASE
> +	or		t0, t0, t1
> +
> +	/*
> +	 * s1:initfn
> +	 * t0:base t1:cpuid t2:node t3:core t4:count
> +	 */
> +	csrrd		t1, LOONGARCH_CSR_CPUID
> +	andi		t1, t1, CSR_CPUID_COREID
> +	andi		t3, t1, 0x3
> +	slli.w		t3, t3, 8              /* get core id */
> +	or		t0, t0, t3
> +	andi		t2, t1, 0x3c
> +	slli.d		t2, t2, 42             /* get node id */
> +	or		t0, t0, t2
> +
> +1:	li.w		t4, 0x100              /* wait for init loop */
> +2:	addi.w		t4, t4, -1             /* limit mailbox access */
> +	bnez		t4, 2b
> +	ld.w		s1, t0, 0x20           /* check PC as an indicator */
Can we do this with iocsr*?

Thanks,
Jinyang
> +	beqz		s1, 1b
> +	ld.d		s1, t0, 0x20           /* get PC via mailbox */
> +	ld.d		sp, t0, 0x28           /* get SP via mailbox */
> +	ld.d		tp, t0, 0x30           /* get TP via mailbox */
> +
> +	li.d		t0, CACHE_BASE
> +	or		s1, s1, t0
> +	jr		s1                     /* jump to initial PC */
> +SYM_CODE_END(kexec_smp_wait)
> +#endif
> +
> +relocate_new_kernel_end:
> +
> +SYM_DATA_START(relocate_new_kernel_size)
> +	PTR		relocate_new_kernel_end - relocate_new_kernel
> +SYM_DATA_END(relocate_new_kernel_size)


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/3] LoongArch: Add kexec support
  2022-08-30  1:53   ` Jinyang He
@ 2022-08-30  3:25     ` Youling Tang
  2022-08-30  3:42       ` Huacai Chen
  2022-08-31  3:38       ` Youling Tang
  0 siblings, 2 replies; 20+ messages in thread
From: Youling Tang @ 2022-08-30  3:25 UTC (permalink / raw)
  To: Jinyang He, Huacai Chen
  Cc: Baoquan He, Eric Biederman, WANG Xuerui, Vivek Goyal, Dave Young,
	Guo Ren, Jiaxun Yang, kexec, loongarch, linux-kernel

Hi, Jinyang

On 08/30/2022 09:53 AM, Jinyang He wrote:
> Hi, Youling,
>
>
> On 08/29/2022 12:37 PM, Youling Tang wrote:
>> Add three new files, kexec.h, machine_kexec.c and relocate_kernel.S to
>> the
>> LoongArch architecture that add support for the kexec re-boot mechanis
>> (CONFIG_KEXEC) on LoongArch platforms.
>>
>> Supports loading vmlinux (vmlinux.elf) in ELF format and vmlinux.efi in
>> PE format.
>>
>> I tested this on  LoongArch 3A5000 machine and works as expected,
>>
>>   $ sudo kexec -l /boot/vmlinux.efi --reuse-cmdline
>>   $ sudo kexec -e
>>
>> Signed-off-by: Youling Tang <tangyouling@loongson.cn>
>> ---
>>   arch/loongarch/Kconfig                  |  11 ++
>>   arch/loongarch/include/asm/kexec.h      |  58 ++++++++
>>   arch/loongarch/kernel/Makefile          |   2 +
>>   arch/loongarch/kernel/head.S            |   7 +-
>>   arch/loongarch/kernel/machine_kexec.c   | 178 ++++++++++++++++++++++++
>>   arch/loongarch/kernel/relocate_kernel.S | 125 +++++++++++++++++
>>   6 files changed, 380 insertions(+), 1 deletion(-)
>>   create mode 100644 arch/loongarch/include/asm/kexec.h
>>   create mode 100644 arch/loongarch/kernel/machine_kexec.c
>>   create mode 100644 arch/loongarch/kernel/relocate_kernel.S
>>
>> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
>> index 45364cffc793..903c82fa958d 100644
>> --- a/arch/loongarch/Kconfig
>> +++ b/arch/loongarch/Kconfig
>> @@ -409,6 +409,17 @@ config FORCE_MAX_ZONEORDER
>>         The page size is not necessarily 4KB.  Keep this in mind
>>         when choosing a value for this option.
>>   +config KEXEC
>> +    bool "Kexec system call"
>> +    select KEXEC_CORE
>> +    help
>> +      kexec is a system call that implements the ability to shutdown
>> your
>> +      current kernel, and to start another kernel.  It is like a reboot
>> +      but it is independent of the system firmware.   And like a reboot
>> +      you can start any kernel with it, not just Linux.
>> +
>> +      The name comes from the similarity to the exec system call.
>> +
>>   config SECCOMP
>>       bool "Enable seccomp to safely compute untrusted bytecode"
>>       depends on PROC_FS
>> diff --git a/arch/loongarch/include/asm/kexec.h
>> b/arch/loongarch/include/asm/kexec.h
>> new file mode 100644
>> index 000000000000..5c9e7b5eccb8
>> --- /dev/null
>> +++ b/arch/loongarch/include/asm/kexec.h
>> @@ -0,0 +1,58 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +/*
>> + * kexec.h for kexec
>> + *
>> + * Copyright (C) 2022 Loongson Technology Corporation Limited
>> + */
>> +
>> +#ifndef _ASM_KEXEC_H
>> +#define _ASM_KEXEC_H
>> +
>> +#include <asm/stacktrace.h>
>> +#include <asm/page.h>
>> +
>> +/* Maximum physical address we can use pages from */
>> +#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
>> +/* Maximum address we can reach in physical address mode */
>> +#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
>> + /* Maximum address we can use for the control code buffer */
>> +#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL)
>> +
>> +/* Reserve a page for the control code buffer */
>> +#define KEXEC_CONTROL_PAGE_SIZE PAGE_SIZE
>> +
>> +/* The native architecture */
>> +#define KEXEC_ARCH KEXEC_ARCH_LOONGARCH
>> +
>> +static inline void crash_setup_regs(struct pt_regs *newregs,
>> +                    struct pt_regs *oldregs)
>> +{
>> +    if (oldregs)
>> +        memcpy(newregs, oldregs, sizeof(*newregs));
>> +    else
>> +        prepare_frametrace(newregs);
>> +}
>> +
>> +#define ARCH_HAS_KIMAGE_ARCH
>> +
>> +struct kimage_arch {
>> +    unsigned long boot_flag;
>> +    unsigned long fdt_addr;
>> +};
>> +
>> +typedef void (*do_kexec_t)(unsigned long boot_flag,
>> +               unsigned long fdt_addr,
>> +               unsigned long first_ind_entry,
>> +               unsigned long jump_addr);
>> +
>> +struct kimage;
>> +extern const unsigned char relocate_new_kernel[];
>> +extern const size_t relocate_new_kernel_size;
>> +
>> +#ifdef CONFIG_SMP
>> +extern atomic_t kexec_ready_to_reboot;
>> +extern const unsigned char kexec_smp_wait[];
>> +extern void kexec_reboot(void);
>> +#endif
>> +
>> +#endif /* !_ASM_KEXEC_H */
>> diff --git a/arch/loongarch/kernel/Makefile
>> b/arch/loongarch/kernel/Makefile
>> index a213e994db68..20b64ac3f128 100644
>> --- a/arch/loongarch/kernel/Makefile
>> +++ b/arch/loongarch/kernel/Makefile
>> @@ -17,6 +17,8 @@ obj-$(CONFIG_CPU_HAS_FPU)    += fpu.o
>>   obj-$(CONFIG_MODULES)        += module.o module-sections.o
>>   obj-$(CONFIG_STACKTRACE)    += stacktrace.o
>>   +obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o
>> +
>>   obj-$(CONFIG_PROC_FS)        += proc.o
>>     obj-$(CONFIG_SMP)        += smp.o
>> diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S
>> index 01bac62a6442..22bdf4928325 100644
>> --- a/arch/loongarch/kernel/head.S
>> +++ b/arch/loongarch/kernel/head.S
>> @@ -20,7 +20,12 @@
>>     _head:
>>       .word    MZ_MAGIC        /* "MZ", MS-DOS header */
>> -    .org    0x3c            /* 0x04 ~ 0x3b reserved */
>> +    .org    0x8
>> +    .quad    0            /* Image load offset from start of RAM */
>> +    .dword    _end - _text        /* Effective size of kernel image */
>> +    .quad    0
>> +    .dword    kernel_entry        /* Kernel entry point */
>> +    .org    0x3c            /* 0x28 ~ 0x3b reserved */
>>       .long    pe_header - _head    /* Offset to the PE header */
>>     pe_header:
>> diff --git a/arch/loongarch/kernel/machine_kexec.c
>> b/arch/loongarch/kernel/machine_kexec.c
>> new file mode 100644
>> index 000000000000..4ffcd4cd9c8c
>> --- /dev/null
>> +++ b/arch/loongarch/kernel/machine_kexec.c
>> @@ -0,0 +1,178 @@
>> +// SPDX-License-Identifier: GPL-2.0-only
>> +/*
>> + * machine_kexec.c for kexec
>> + *
>> + * Copyright (C) 2022 Loongson Technology Corporation Limited
>> + */
>> +#include <linux/compiler.h>
>> +#include <linux/cpu.h>
>> +#include <linux/kexec.h>
>> +#include <linux/mm.h>
>> +#include <linux/delay.h>
>> +#include <linux/libfdt.h>
>> +#include <linux/of_fdt.h>
>> +
>> +#include <asm/bootinfo.h>
>> +#include <asm/cacheflush.h>
>> +#include <asm/page.h>
>> +
>> +/* 0x100000 ~ 0x200000 is safe */
>> +#define KEXEC_CTRL_CODE    TO_CACHE(0x100000UL)
>> +#define KEXEC_BLOB_ADDR    TO_CACHE(0x108000UL)
>> +
>> +static unsigned long reboot_code_buffer;
>> +#ifdef CONFIG_SMP
>> +void (*relocated_kexec_smp_wait)(void *);
>> +atomic_t kexec_ready_to_reboot = ATOMIC_INIT(0);
>> +#endif
>> +
>> +static unsigned long jump_addr;
>> +static unsigned long first_ind_entry;
>> +static unsigned long boot_flag;
>> +static unsigned long fdt_addr;
>> +
>> +static void kexec_image_info(const struct kimage *kimage)
>> +{
>> +    unsigned long i;
>> +
>> +    pr_debug("kexec kimage info:\n");
>> +    pr_debug("\ttype:        %d\n", kimage->type);
>> +    pr_debug("\tstart:       %lx\n", kimage->start);
>> +    pr_debug("\thead:        %lx\n", kimage->head);
>> +    pr_debug("\tnr_segments: %lu\n", kimage->nr_segments);
>> +
>> +    for (i = 0; i < kimage->nr_segments; i++) {
>> +        pr_debug("\t    segment[%lu]: %016lx - %016lx", i,
>> +            kimage->segment[i].mem,
>> +            kimage->segment[i].mem + kimage->segment[i].memsz);
>> +        pr_debug("\t\t0x%lx bytes, %lu pages\n",
>> +            (unsigned long)kimage->segment[i].memsz,
>> +            (unsigned long)kimage->segment[i].memsz /  PAGE_SIZE);
>> +    }
>> +}
>> +
>> +int machine_kexec_prepare(struct kimage *kimage)
>> +{
>> +    int i;
>> +    void *dtb = (void *)KEXEC_BLOB_ADDR;
>> +
>> +    kexec_image_info(kimage);
>> +
>> +    /* Find the Flattened Device Tree */
>> +    for (i = 0; i < kimage->nr_segments; i++) {
>> +        if (!fdt_check_header(kimage->segment[i].buf)) {
>> +            memcpy(dtb, kimage->segment[i].buf, SZ_64K);
>> +            kimage->arch.boot_flag = fw_arg0;
>> +            kimage->arch.fdt_addr = (unsigned long) dtb;
>> +            break;
>> +        }
>> +        continue;
>> +    }
>> +
>> +    /* kexec need a safe page to save reboot_code_buffer */
>> +    kimage->control_code_page = virt_to_page((void *)KEXEC_CTRL_CODE);
>> +
>> +    reboot_code_buffer =
>> +      (unsigned long)page_address(kimage->control_code_page);
>> +    memcpy((void *)reboot_code_buffer, relocate_new_kernel,
>> +           relocate_new_kernel_size);
> It copys same content to KEXEC_CTRL_CODE each time, could we do this at
> boot time?
I think it's possible to have the copy action happen at boot-time or
during the prepare phase. (RISCV in prepare, MIPS in boot-time)

>
> BTW, our system always keep the low-2MB no used, on mips-loongson or
> LoongArch. Is that necessary on LoongArch? We cannot use parameter
> 'mem=YYM' normally but 'mem=YYM@2M' is ok. And the low-2MB is not
> in virtual memory management, although we can get it in kernel.
For existing kernels, the low 2M has been reserved by
memblock_reserve(PHYS_OFFSET, 0x200000), maybe it is acceptable to keep
the low 2M behavior.

Yes, we need to use "mem=YM@2M" if the low 2M is reserved.

>
> In kexec/kdump process, we can follows kimage_alloc_control_pages().
> When the boot cpu copy complete the second-kernels, all cpus can jump
> to a kernel-entry-trampoline which is in kernel image. Then we don't
> worry about the code can be destroyed. The kernel-entry-trampoline
> get its cpuid, keep non-boot cpus do as kexec_smp_wait and let boot
> cpu go kernel-entry. In this way we can drop the low-2MB IMO.

It is also feasible to dynamically allocate control pages, but it is
easier to use a low 2M approach. What do you think, Huacai?

>
>> +
>> +    /* All secondary cpus now may jump to kexec_smp_wait cycle */
>> +    relocated_kexec_smp_wait = reboot_code_buffer +
>> +        (void *)(kexec_smp_wait - relocate_new_kernel);
>> +
>> +    return 0;
>> +}
>> +
>> +void machine_kexec_cleanup(struct kimage *kimage)
>> +{
>> +}
>> +
>> +#ifdef CONFIG_SMP
>> +void kexec_reboot(void)
>> +{
>> +    do_kexec_t do_kexec = NULL;
>> +
>> +    /* All secondary cpus go to kexec_smp_wait */
>> +    if (smp_processor_id() > 0) {
>> +        relocated_kexec_smp_wait(NULL);
>> +        unreachable();
>> +    }
>> +
>> +    do_kexec = (void *)reboot_code_buffer;
>> +    do_kexec(boot_flag, fdt_addr, first_ind_entry, jump_addr);
>> +
>> +    unreachable();
>> +}
>> +
>> +static void kexec_shutdown_secondary(void *)
>> +{
>> +    local_irq_disable();
>> +    while (!atomic_read(&kexec_ready_to_reboot))
>> +        cpu_relax();
>> +
>> +    kexec_reboot();
>> +}
>> +
>> +void machine_crash_shutdown(struct pt_regs *regs)
>> +{
>> +}
>> +#endif
>> +
>> +void machine_shutdown(void)
>> +{
>> +    smp_call_function(kexec_shutdown_secondary, NULL, 0);
>> +}
>> +
>> +void machine_kexec(struct kimage *image)
>> +{
>> +    unsigned long entry;
>> +    unsigned long *ptr;
>> +    struct kimage_arch *internal = &image->arch;
>> +
>> +    boot_flag = internal->boot_flag;
>> +    fdt_addr = internal->fdt_addr;
>> +
>> +    jump_addr = (unsigned long)phys_to_virt(image->start);
>> +
>> +    first_ind_entry = (unsigned long)phys_to_virt(image->head &
>> PAGE_MASK);
>> +
>> +    /*
>> +     * The generic kexec code builds a page list with physical
>> +     * addresses. they are directly accessible through XKPRANGE
>> +     * hence the phys_to_virt() call.
>> +     */
>> +    for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE);
>> +         ptr = (entry & IND_INDIRECTION) ?
>> +           phys_to_virt(entry & PAGE_MASK) : ptr + 1) {
>> +        if (*ptr & IND_SOURCE || *ptr & IND_INDIRECTION ||
>> +            *ptr & IND_DESTINATION)
>> +            *ptr = (unsigned long) phys_to_virt(*ptr);
>> +    }
>> +
>> +    /* Mark offline before disabling local irq. */
>> +    set_cpu_online(smp_processor_id(), false);
>> +
>> +    /* we do not want to be bothered. */
>> +    local_irq_disable();
>> +
>> +    pr_notice("Will call new kernel at %lx\n", jump_addr);
>> +    pr_notice("FDT image at %lx\n", fdt_addr);
>> +    pr_notice("Bye ...\n");
>> +
>> +    /* Make reboot code buffer available to the boot CPU. */
>> +    flush_cache_all();
>> +
>> +    atomic_set(&kexec_ready_to_reboot, 1);
>> +
>> +    /*
>> +     * We know we were online, and there will be no incoming IPIs at
>> +     * this point.
>> +     */
>> +    set_cpu_online(smp_processor_id(), true);
>> +
>> +    /* Ensure remote CPUs observe that we're online before rebooting. */
>> +    smp_mb__after_atomic();
>> +
>> +    kexec_reboot();
>> +}
>> diff --git a/arch/loongarch/kernel/relocate_kernel.S
>> b/arch/loongarch/kernel/relocate_kernel.S
>> new file mode 100644
>> index 000000000000..d1f242f74ea8
>> --- /dev/null
>> +++ b/arch/loongarch/kernel/relocate_kernel.S
>> @@ -0,0 +1,125 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +/*
>> + * relocate_kernel.S for kexec
>> + *
>> + * Copyright (C) 2022 Loongson Technology Corporation Limited
>> + */
>> +
>> +#include <linux/kexec.h>
>> +
>> +#include <asm/asm.h>
>> +#include <asm/asmmacro.h>
>> +#include <asm/regdef.h>
>> +#include <asm/loongarch.h>
>> +#include <asm/stackframe.h>
>> +#include <asm/addrspace.h>
>> +
>> +#define IPI_REG_BASE 0x1fe01000
>> +
>> +SYM_CODE_START(relocate_new_kernel)
>> +    /*
>> +     * s0: Boot flag passed to the new kernel
>> +     * s1: Virt address of the FDT image
>> +     * s2: Pointer to the current entry
>> +     * s3: Virt address to jump to after relocation
>> +     */
>> +    move        s0, a0
>> +    move        s1, a1
>> +    move        s2, a2
>> +    move        s3, a3
>> +
>> +process_entry:
>> +    PTR_L        s4, s2, 0
>> +    PTR_ADDI    s2, s2, SZREG
>> +
>> +    /* destination page */
>> +    andi        s5, s4, IND_DESTINATION
>> +    beqz        s5, 1f
>> +    li.w        t0, ~0x1
>> +    and        s6, s4, t0    /* store destination addr in s6 */
>> +    b        process_entry
>> +
>> +1:
>> +    /* indirection page, update s2    */
>> +    andi        s5, s4, IND_INDIRECTION
>> +    beqz        s5, 1f
>> +    li.w        t0, ~0x2
>> +    and        s2, s4, t0
>> +    b        process_entry
>> +
>> +1:
>> +    /* done page */
>> +    andi        s5, s4, IND_DONE
>> +    beqz        s5, 1f
>> +    b        done
>> +1:
>> +    /* source page */
>> +    andi        s5, s4, IND_SOURCE
>> +    beqz        s5, process_entry
>> +    li.w        t0, ~0x8
>> +    and        s4, s4, t0
>> +    li.w        s8, (1 << _PAGE_SHIFT) / SZREG
>> +
>> +copy_word:
>> +    /* copy page word by word */
>> +    REG_L        s7, s4, 0
>> +    REG_S        s7, s6, 0
>> +    PTR_ADDI    s6, s6, SZREG
>> +    PTR_ADDI    s4, s4, SZREG
>> +    LONG_ADDI    s8, s8, -1
>> +    beqz        s8, process_entry
>> +    b        copy_word
>> +    b        process_entry
>> +
>> +done:
>> +    dbar        0
> ibar, too?

Will add ibar 0.

>> +
>> +    move        a0, s0
>> +    move        a1, s1
>> +    /* jump to the new kernel */
>> +    jr        s3
>> +SYM_CODE_END(relocate_new_kernel)
>> +
>> +#ifdef CONFIG_SMP
>> +/*
>> + * Other CPUs should wait until code is relocated and
>> + * then start at entry (?) point.
>> + */
>> +SYM_CODE_START(kexec_smp_wait)
>> +    li.d        t0, IPI_REG_BASE
>> +    li.d        t1, UNCACHE_BASE
>> +    or        t0, t0, t1
>> +
>> +    /*
>> +     * s1:initfn
>> +     * t0:base t1:cpuid t2:node t3:core t4:count
>> +     */
>> +    csrrd        t1, LOONGARCH_CSR_CPUID
>> +    andi        t1, t1, CSR_CPUID_COREID
>> +    andi        t3, t1, 0x3
>> +    slli.w        t3, t3, 8              /* get core id */
>> +    or        t0, t0, t3
>> +    andi        t2, t1, 0x3c
>> +    slli.d        t2, t2, 42             /* get node id */
>> +    or        t0, t0, t2
>> +
>> +1:    li.w        t4, 0x100              /* wait for init loop */
>> +2:    addi.w        t4, t4, -1             /* limit mailbox access */
>> +    bnez        t4, 2b
>> +    ld.w        s1, t0, 0x20           /* check PC as an indicator */
> Can we do this with iocsr*?

OK, I will consider the implementation in the iocsr way.

Thanks,
Youling
>
> Thanks,
> Jinyang
>> +    beqz        s1, 1b
>> +    ld.d        s1, t0, 0x20           /* get PC via mailbox */
>> +    ld.d        sp, t0, 0x28           /* get SP via mailbox */
>> +    ld.d        tp, t0, 0x30           /* get TP via mailbox */
>> +
>> +    li.d        t0, CACHE_BASE
>> +    or        s1, s1, t0
>> +    jr        s1                     /* jump to initial PC */
>> +SYM_CODE_END(kexec_smp_wait)
>> +#endif
>> +
>> +relocate_new_kernel_end:
>> +
>> +SYM_DATA_START(relocate_new_kernel_size)
>> +    PTR        relocate_new_kernel_end - relocate_new_kernel
>> +SYM_DATA_END(relocate_new_kernel_size)
>


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/3] LoongArch: Add kexec support
  2022-08-30  3:25     ` Youling Tang
@ 2022-08-30  3:42       ` Huacai Chen
  2022-08-30  4:07         ` Jinyang He
  2022-08-31  3:38       ` Youling Tang
  1 sibling, 1 reply; 20+ messages in thread
From: Huacai Chen @ 2022-08-30  3:42 UTC (permalink / raw)
  To: Youling Tang
  Cc: Jinyang He, Baoquan He, Eric Biederman, WANG Xuerui, Vivek Goyal,
	Dave Young, Guo Ren, Jiaxun Yang, kexec, loongarch, LKML

On Tue, Aug 30, 2022 at 11:26 AM Youling Tang <tangyouling@loongson.cn> wrote:
>
> Hi, Jinyang
>
> On 08/30/2022 09:53 AM, Jinyang He wrote:
> > Hi, Youling,
> >
> >
> > On 08/29/2022 12:37 PM, Youling Tang wrote:
> >> Add three new files, kexec.h, machine_kexec.c and relocate_kernel.S to
> >> the
> >> LoongArch architecture that add support for the kexec re-boot mechanis
> >> (CONFIG_KEXEC) on LoongArch platforms.
> >>
> >> Supports loading vmlinux (vmlinux.elf) in ELF format and vmlinux.efi in
> >> PE format.
> >>
> >> I tested this on  LoongArch 3A5000 machine and works as expected,
> >>
> >>   $ sudo kexec -l /boot/vmlinux.efi --reuse-cmdline
> >>   $ sudo kexec -e
> >>
> >> Signed-off-by: Youling Tang <tangyouling@loongson.cn>
> >> ---
> >>   arch/loongarch/Kconfig                  |  11 ++
> >>   arch/loongarch/include/asm/kexec.h      |  58 ++++++++
> >>   arch/loongarch/kernel/Makefile          |   2 +
> >>   arch/loongarch/kernel/head.S            |   7 +-
> >>   arch/loongarch/kernel/machine_kexec.c   | 178 ++++++++++++++++++++++++
> >>   arch/loongarch/kernel/relocate_kernel.S | 125 +++++++++++++++++
> >>   6 files changed, 380 insertions(+), 1 deletion(-)
> >>   create mode 100644 arch/loongarch/include/asm/kexec.h
> >>   create mode 100644 arch/loongarch/kernel/machine_kexec.c
> >>   create mode 100644 arch/loongarch/kernel/relocate_kernel.S
> >>
> >> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
> >> index 45364cffc793..903c82fa958d 100644
> >> --- a/arch/loongarch/Kconfig
> >> +++ b/arch/loongarch/Kconfig
> >> @@ -409,6 +409,17 @@ config FORCE_MAX_ZONEORDER
> >>         The page size is not necessarily 4KB.  Keep this in mind
> >>         when choosing a value for this option.
> >>   +config KEXEC
> >> +    bool "Kexec system call"
> >> +    select KEXEC_CORE
> >> +    help
> >> +      kexec is a system call that implements the ability to shutdown
> >> your
> >> +      current kernel, and to start another kernel.  It is like a reboot
> >> +      but it is independent of the system firmware.   And like a reboot
> >> +      you can start any kernel with it, not just Linux.
> >> +
> >> +      The name comes from the similarity to the exec system call.
> >> +
> >>   config SECCOMP
> >>       bool "Enable seccomp to safely compute untrusted bytecode"
> >>       depends on PROC_FS
> >> diff --git a/arch/loongarch/include/asm/kexec.h
> >> b/arch/loongarch/include/asm/kexec.h
> >> new file mode 100644
> >> index 000000000000..5c9e7b5eccb8
> >> --- /dev/null
> >> +++ b/arch/loongarch/include/asm/kexec.h
> >> @@ -0,0 +1,58 @@
> >> +/* SPDX-License-Identifier: GPL-2.0 */
> >> +/*
> >> + * kexec.h for kexec
> >> + *
> >> + * Copyright (C) 2022 Loongson Technology Corporation Limited
> >> + */
> >> +
> >> +#ifndef _ASM_KEXEC_H
> >> +#define _ASM_KEXEC_H
> >> +
> >> +#include <asm/stacktrace.h>
> >> +#include <asm/page.h>
> >> +
> >> +/* Maximum physical address we can use pages from */
> >> +#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
> >> +/* Maximum address we can reach in physical address mode */
> >> +#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
> >> + /* Maximum address we can use for the control code buffer */
> >> +#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL)
> >> +
> >> +/* Reserve a page for the control code buffer */
> >> +#define KEXEC_CONTROL_PAGE_SIZE PAGE_SIZE
> >> +
> >> +/* The native architecture */
> >> +#define KEXEC_ARCH KEXEC_ARCH_LOONGARCH
> >> +
> >> +static inline void crash_setup_regs(struct pt_regs *newregs,
> >> +                    struct pt_regs *oldregs)
> >> +{
> >> +    if (oldregs)
> >> +        memcpy(newregs, oldregs, sizeof(*newregs));
> >> +    else
> >> +        prepare_frametrace(newregs);
> >> +}
> >> +
> >> +#define ARCH_HAS_KIMAGE_ARCH
> >> +
> >> +struct kimage_arch {
> >> +    unsigned long boot_flag;
> >> +    unsigned long fdt_addr;
> >> +};
> >> +
> >> +typedef void (*do_kexec_t)(unsigned long boot_flag,
> >> +               unsigned long fdt_addr,
> >> +               unsigned long first_ind_entry,
> >> +               unsigned long jump_addr);
> >> +
> >> +struct kimage;
> >> +extern const unsigned char relocate_new_kernel[];
> >> +extern const size_t relocate_new_kernel_size;
> >> +
> >> +#ifdef CONFIG_SMP
> >> +extern atomic_t kexec_ready_to_reboot;
> >> +extern const unsigned char kexec_smp_wait[];
> >> +extern void kexec_reboot(void);
> >> +#endif
> >> +
> >> +#endif /* !_ASM_KEXEC_H */
> >> diff --git a/arch/loongarch/kernel/Makefile
> >> b/arch/loongarch/kernel/Makefile
> >> index a213e994db68..20b64ac3f128 100644
> >> --- a/arch/loongarch/kernel/Makefile
> >> +++ b/arch/loongarch/kernel/Makefile
> >> @@ -17,6 +17,8 @@ obj-$(CONFIG_CPU_HAS_FPU)    += fpu.o
> >>   obj-$(CONFIG_MODULES)        += module.o module-sections.o
> >>   obj-$(CONFIG_STACKTRACE)    += stacktrace.o
> >>   +obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o
> >> +
> >>   obj-$(CONFIG_PROC_FS)        += proc.o
> >>     obj-$(CONFIG_SMP)        += smp.o
> >> diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S
> >> index 01bac62a6442..22bdf4928325 100644
> >> --- a/arch/loongarch/kernel/head.S
> >> +++ b/arch/loongarch/kernel/head.S
> >> @@ -20,7 +20,12 @@
> >>     _head:
> >>       .word    MZ_MAGIC        /* "MZ", MS-DOS header */
> >> -    .org    0x3c            /* 0x04 ~ 0x3b reserved */
> >> +    .org    0x8
> >> +    .quad    0            /* Image load offset from start of RAM */
> >> +    .dword    _end - _text        /* Effective size of kernel image */
> >> +    .quad    0
> >> +    .dword    kernel_entry        /* Kernel entry point */
> >> +    .org    0x3c            /* 0x28 ~ 0x3b reserved */
> >>       .long    pe_header - _head    /* Offset to the PE header */
> >>     pe_header:
> >> diff --git a/arch/loongarch/kernel/machine_kexec.c
> >> b/arch/loongarch/kernel/machine_kexec.c
> >> new file mode 100644
> >> index 000000000000..4ffcd4cd9c8c
> >> --- /dev/null
> >> +++ b/arch/loongarch/kernel/machine_kexec.c
> >> @@ -0,0 +1,178 @@
> >> +// SPDX-License-Identifier: GPL-2.0-only
> >> +/*
> >> + * machine_kexec.c for kexec
> >> + *
> >> + * Copyright (C) 2022 Loongson Technology Corporation Limited
> >> + */
> >> +#include <linux/compiler.h>
> >> +#include <linux/cpu.h>
> >> +#include <linux/kexec.h>
> >> +#include <linux/mm.h>
> >> +#include <linux/delay.h>
> >> +#include <linux/libfdt.h>
> >> +#include <linux/of_fdt.h>
> >> +
> >> +#include <asm/bootinfo.h>
> >> +#include <asm/cacheflush.h>
> >> +#include <asm/page.h>
> >> +
> >> +/* 0x100000 ~ 0x200000 is safe */
> >> +#define KEXEC_CTRL_CODE    TO_CACHE(0x100000UL)
> >> +#define KEXEC_BLOB_ADDR    TO_CACHE(0x108000UL)
> >> +
> >> +static unsigned long reboot_code_buffer;
> >> +#ifdef CONFIG_SMP
> >> +void (*relocated_kexec_smp_wait)(void *);
> >> +atomic_t kexec_ready_to_reboot = ATOMIC_INIT(0);
> >> +#endif
> >> +
> >> +static unsigned long jump_addr;
> >> +static unsigned long first_ind_entry;
> >> +static unsigned long boot_flag;
> >> +static unsigned long fdt_addr;
> >> +
> >> +static void kexec_image_info(const struct kimage *kimage)
> >> +{
> >> +    unsigned long i;
> >> +
> >> +    pr_debug("kexec kimage info:\n");
> >> +    pr_debug("\ttype:        %d\n", kimage->type);
> >> +    pr_debug("\tstart:       %lx\n", kimage->start);
> >> +    pr_debug("\thead:        %lx\n", kimage->head);
> >> +    pr_debug("\tnr_segments: %lu\n", kimage->nr_segments);
> >> +
> >> +    for (i = 0; i < kimage->nr_segments; i++) {
> >> +        pr_debug("\t    segment[%lu]: %016lx - %016lx", i,
> >> +            kimage->segment[i].mem,
> >> +            kimage->segment[i].mem + kimage->segment[i].memsz);
> >> +        pr_debug("\t\t0x%lx bytes, %lu pages\n",
> >> +            (unsigned long)kimage->segment[i].memsz,
> >> +            (unsigned long)kimage->segment[i].memsz /  PAGE_SIZE);
> >> +    }
> >> +}
> >> +
> >> +int machine_kexec_prepare(struct kimage *kimage)
> >> +{
> >> +    int i;
> >> +    void *dtb = (void *)KEXEC_BLOB_ADDR;
> >> +
> >> +    kexec_image_info(kimage);
> >> +
> >> +    /* Find the Flattened Device Tree */
> >> +    for (i = 0; i < kimage->nr_segments; i++) {
> >> +        if (!fdt_check_header(kimage->segment[i].buf)) {
> >> +            memcpy(dtb, kimage->segment[i].buf, SZ_64K);
> >> +            kimage->arch.boot_flag = fw_arg0;
> >> +            kimage->arch.fdt_addr = (unsigned long) dtb;
> >> +            break;
> >> +        }
> >> +        continue;
> >> +    }
> >> +
> >> +    /* kexec need a safe page to save reboot_code_buffer */
> >> +    kimage->control_code_page = virt_to_page((void *)KEXEC_CTRL_CODE);
> >> +
> >> +    reboot_code_buffer =
> >> +      (unsigned long)page_address(kimage->control_code_page);
> >> +    memcpy((void *)reboot_code_buffer, relocate_new_kernel,
> >> +           relocate_new_kernel_size);
> > It copys same content to KEXEC_CTRL_CODE each time, could we do this at
> > boot time?
> I think it's possible to have the copy action happen at boot-time or
> during the prepare phase. (RISCV in prepare, MIPS in boot-time)
>
> >
> > BTW, our system always keep the low-2MB no used, on mips-loongson or
> > LoongArch. Is that necessary on LoongArch? We cannot use parameter
> > 'mem=YYM' normally but 'mem=YYM@2M' is ok. And the low-2MB is not
> > in virtual memory management, although we can get it in kernel.
> For existing kernels, the low 2M has been reserved by
> memblock_reserve(PHYS_OFFSET, 0x200000), maybe it is acceptable to keep
> the low 2M behavior.
>
> Yes, we need to use "mem=YM@2M" if the low 2M is reserved.
>
> >
> > In kexec/kdump process, we can follows kimage_alloc_control_pages().
> > When the boot cpu copy complete the second-kernels, all cpus can jump
> > to a kernel-entry-trampoline which is in kernel image. Then we don't
> > worry about the code can be destroyed. The kernel-entry-trampoline
> > get its cpuid, keep non-boot cpus do as kexec_smp_wait and let boot
> > cpu go kernel-entry. In this way we can drop the low-2MB IMO.
>
> It is also feasible to dynamically allocate control pages, but it is
> easier to use a low 2M approach. What do you think, Huacai?
I prefer to use the low 2MB.

Huacai
>
> >
> >> +
> >> +    /* All secondary cpus now may jump to kexec_smp_wait cycle */
> >> +    relocated_kexec_smp_wait = reboot_code_buffer +
> >> +        (void *)(kexec_smp_wait - relocate_new_kernel);
> >> +
> >> +    return 0;
> >> +}
> >> +
> >> +void machine_kexec_cleanup(struct kimage *kimage)
> >> +{
> >> +}
> >> +
> >> +#ifdef CONFIG_SMP
> >> +void kexec_reboot(void)
> >> +{
> >> +    do_kexec_t do_kexec = NULL;
> >> +
> >> +    /* All secondary cpus go to kexec_smp_wait */
> >> +    if (smp_processor_id() > 0) {
> >> +        relocated_kexec_smp_wait(NULL);
> >> +        unreachable();
> >> +    }
> >> +
> >> +    do_kexec = (void *)reboot_code_buffer;
> >> +    do_kexec(boot_flag, fdt_addr, first_ind_entry, jump_addr);
> >> +
> >> +    unreachable();
> >> +}
> >> +
> >> +static void kexec_shutdown_secondary(void *)
> >> +{
> >> +    local_irq_disable();
> >> +    while (!atomic_read(&kexec_ready_to_reboot))
> >> +        cpu_relax();
> >> +
> >> +    kexec_reboot();
> >> +}
> >> +
> >> +void machine_crash_shutdown(struct pt_regs *regs)
> >> +{
> >> +}
> >> +#endif
> >> +
> >> +void machine_shutdown(void)
> >> +{
> >> +    smp_call_function(kexec_shutdown_secondary, NULL, 0);
> >> +}
> >> +
> >> +void machine_kexec(struct kimage *image)
> >> +{
> >> +    unsigned long entry;
> >> +    unsigned long *ptr;
> >> +    struct kimage_arch *internal = &image->arch;
> >> +
> >> +    boot_flag = internal->boot_flag;
> >> +    fdt_addr = internal->fdt_addr;
> >> +
> >> +    jump_addr = (unsigned long)phys_to_virt(image->start);
> >> +
> >> +    first_ind_entry = (unsigned long)phys_to_virt(image->head &
> >> PAGE_MASK);
> >> +
> >> +    /*
> >> +     * The generic kexec code builds a page list with physical
> >> +     * addresses. they are directly accessible through XKPRANGE
> >> +     * hence the phys_to_virt() call.
> >> +     */
> >> +    for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE);
> >> +         ptr = (entry & IND_INDIRECTION) ?
> >> +           phys_to_virt(entry & PAGE_MASK) : ptr + 1) {
> >> +        if (*ptr & IND_SOURCE || *ptr & IND_INDIRECTION ||
> >> +            *ptr & IND_DESTINATION)
> >> +            *ptr = (unsigned long) phys_to_virt(*ptr);
> >> +    }
> >> +
> >> +    /* Mark offline before disabling local irq. */
> >> +    set_cpu_online(smp_processor_id(), false);
> >> +
> >> +    /* we do not want to be bothered. */
> >> +    local_irq_disable();
> >> +
> >> +    pr_notice("Will call new kernel at %lx\n", jump_addr);
> >> +    pr_notice("FDT image at %lx\n", fdt_addr);
> >> +    pr_notice("Bye ...\n");
> >> +
> >> +    /* Make reboot code buffer available to the boot CPU. */
> >> +    flush_cache_all();
> >> +
> >> +    atomic_set(&kexec_ready_to_reboot, 1);
> >> +
> >> +    /*
> >> +     * We know we were online, and there will be no incoming IPIs at
> >> +     * this point.
> >> +     */
> >> +    set_cpu_online(smp_processor_id(), true);
> >> +
> >> +    /* Ensure remote CPUs observe that we're online before rebooting. */
> >> +    smp_mb__after_atomic();
> >> +
> >> +    kexec_reboot();
> >> +}
> >> diff --git a/arch/loongarch/kernel/relocate_kernel.S
> >> b/arch/loongarch/kernel/relocate_kernel.S
> >> new file mode 100644
> >> index 000000000000..d1f242f74ea8
> >> --- /dev/null
> >> +++ b/arch/loongarch/kernel/relocate_kernel.S
> >> @@ -0,0 +1,125 @@
> >> +/* SPDX-License-Identifier: GPL-2.0 */
> >> +/*
> >> + * relocate_kernel.S for kexec
> >> + *
> >> + * Copyright (C) 2022 Loongson Technology Corporation Limited
> >> + */
> >> +
> >> +#include <linux/kexec.h>
> >> +
> >> +#include <asm/asm.h>
> >> +#include <asm/asmmacro.h>
> >> +#include <asm/regdef.h>
> >> +#include <asm/loongarch.h>
> >> +#include <asm/stackframe.h>
> >> +#include <asm/addrspace.h>
> >> +
> >> +#define IPI_REG_BASE 0x1fe01000
> >> +
> >> +SYM_CODE_START(relocate_new_kernel)
> >> +    /*
> >> +     * s0: Boot flag passed to the new kernel
> >> +     * s1: Virt address of the FDT image
> >> +     * s2: Pointer to the current entry
> >> +     * s3: Virt address to jump to after relocation
> >> +     */
> >> +    move        s0, a0
> >> +    move        s1, a1
> >> +    move        s2, a2
> >> +    move        s3, a3
> >> +
> >> +process_entry:
> >> +    PTR_L        s4, s2, 0
> >> +    PTR_ADDI    s2, s2, SZREG
> >> +
> >> +    /* destination page */
> >> +    andi        s5, s4, IND_DESTINATION
> >> +    beqz        s5, 1f
> >> +    li.w        t0, ~0x1
> >> +    and        s6, s4, t0    /* store destination addr in s6 */
> >> +    b        process_entry
> >> +
> >> +1:
> >> +    /* indirection page, update s2    */
> >> +    andi        s5, s4, IND_INDIRECTION
> >> +    beqz        s5, 1f
> >> +    li.w        t0, ~0x2
> >> +    and        s2, s4, t0
> >> +    b        process_entry
> >> +
> >> +1:
> >> +    /* done page */
> >> +    andi        s5, s4, IND_DONE
> >> +    beqz        s5, 1f
> >> +    b        done
> >> +1:
> >> +    /* source page */
> >> +    andi        s5, s4, IND_SOURCE
> >> +    beqz        s5, process_entry
> >> +    li.w        t0, ~0x8
> >> +    and        s4, s4, t0
> >> +    li.w        s8, (1 << _PAGE_SHIFT) / SZREG
> >> +
> >> +copy_word:
> >> +    /* copy page word by word */
> >> +    REG_L        s7, s4, 0
> >> +    REG_S        s7, s6, 0
> >> +    PTR_ADDI    s6, s6, SZREG
> >> +    PTR_ADDI    s4, s4, SZREG
> >> +    LONG_ADDI    s8, s8, -1
> >> +    beqz        s8, process_entry
> >> +    b        copy_word
> >> +    b        process_entry
> >> +
> >> +done:
> >> +    dbar        0
> > ibar, too?
>
> Will add ibar 0.
>
> >> +
> >> +    move        a0, s0
> >> +    move        a1, s1
> >> +    /* jump to the new kernel */
> >> +    jr        s3
> >> +SYM_CODE_END(relocate_new_kernel)
> >> +
> >> +#ifdef CONFIG_SMP
> >> +/*
> >> + * Other CPUs should wait until code is relocated and
> >> + * then start at entry (?) point.
> >> + */
> >> +SYM_CODE_START(kexec_smp_wait)
> >> +    li.d        t0, IPI_REG_BASE
> >> +    li.d        t1, UNCACHE_BASE
> >> +    or        t0, t0, t1
> >> +
> >> +    /*
> >> +     * s1:initfn
> >> +     * t0:base t1:cpuid t2:node t3:core t4:count
> >> +     */
> >> +    csrrd        t1, LOONGARCH_CSR_CPUID
> >> +    andi        t1, t1, CSR_CPUID_COREID
> >> +    andi        t3, t1, 0x3
> >> +    slli.w        t3, t3, 8              /* get core id */
> >> +    or        t0, t0, t3
> >> +    andi        t2, t1, 0x3c
> >> +    slli.d        t2, t2, 42             /* get node id */
> >> +    or        t0, t0, t2
> >> +
> >> +1:    li.w        t4, 0x100              /* wait for init loop */
> >> +2:    addi.w        t4, t4, -1             /* limit mailbox access */
> >> +    bnez        t4, 2b
> >> +    ld.w        s1, t0, 0x20           /* check PC as an indicator */
> > Can we do this with iocsr*?
>
> OK, I will consider the implementation in the iocsr way.
>
> Thanks,
> Youling
> >
> > Thanks,
> > Jinyang
> >> +    beqz        s1, 1b
> >> +    ld.d        s1, t0, 0x20           /* get PC via mailbox */
> >> +    ld.d        sp, t0, 0x28           /* get SP via mailbox */
> >> +    ld.d        tp, t0, 0x30           /* get TP via mailbox */
> >> +
> >> +    li.d        t0, CACHE_BASE
> >> +    or        s1, s1, t0
> >> +    jr        s1                     /* jump to initial PC */
> >> +SYM_CODE_END(kexec_smp_wait)
> >> +#endif
> >> +
> >> +relocate_new_kernel_end:
> >> +
> >> +SYM_DATA_START(relocate_new_kernel_size)
> >> +    PTR        relocate_new_kernel_end - relocate_new_kernel
> >> +SYM_DATA_END(relocate_new_kernel_size)
> >
>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/3] LoongArch: Add kexec support
  2022-08-30  3:42       ` Huacai Chen
@ 2022-08-30  4:07         ` Jinyang He
  0 siblings, 0 replies; 20+ messages in thread
From: Jinyang He @ 2022-08-30  4:07 UTC (permalink / raw)
  To: Huacai Chen, Youling Tang
  Cc: Baoquan He, Eric Biederman, WANG Xuerui, Vivek Goyal, Dave Young,
	Guo Ren, Jiaxun Yang, kexec, loongarch, LKML

On 08/30/2022 11:42 AM, Huacai Chen wrote:

> On Tue, Aug 30, 2022 at 11:26 AM Youling Tang <tangyouling@loongson.cn> wrote:
>> Hi, Jinyang
>>
>> On 08/30/2022 09:53 AM, Jinyang He wrote:
>>> Hi, Youling,
>>>
>>>
>>> On 08/29/2022 12:37 PM, Youling Tang wrote:
>>>> Add three new files, kexec.h, machine_kexec.c and relocate_kernel.S to
>>>> the
>>>> LoongArch architecture that add support for the kexec re-boot mechanis
>>>> (CONFIG_KEXEC) on LoongArch platforms.
>>>>
>>>> Supports loading vmlinux (vmlinux.elf) in ELF format and vmlinux.efi in
>>>> PE format.
>>>>
>>>> I tested this on  LoongArch 3A5000 machine and works as expected,
>>>>
>>>>    $ sudo kexec -l /boot/vmlinux.efi --reuse-cmdline
>>>>    $ sudo kexec -e
>>>>
>>>> Signed-off-by: Youling Tang <tangyouling@loongson.cn>
>>>> ---
>>>>    arch/loongarch/Kconfig                  |  11 ++
>>>>    arch/loongarch/include/asm/kexec.h      |  58 ++++++++
>>>>    arch/loongarch/kernel/Makefile          |   2 +
>>>>    arch/loongarch/kernel/head.S            |   7 +-
>>>>    arch/loongarch/kernel/machine_kexec.c   | 178 ++++++++++++++++++++++++
>>>>    arch/loongarch/kernel/relocate_kernel.S | 125 +++++++++++++++++
>>>>    6 files changed, 380 insertions(+), 1 deletion(-)
>>>>    create mode 100644 arch/loongarch/include/asm/kexec.h
>>>>    create mode 100644 arch/loongarch/kernel/machine_kexec.c
>>>>    create mode 100644 arch/loongarch/kernel/relocate_kernel.S
>>>>
>>>> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
>>>> index 45364cffc793..903c82fa958d 100644
>>>> --- a/arch/loongarch/Kconfig
>>>> +++ b/arch/loongarch/Kconfig
>>>> @@ -409,6 +409,17 @@ config FORCE_MAX_ZONEORDER
>>>>          The page size is not necessarily 4KB.  Keep this in mind
>>>>          when choosing a value for this option.
>>>>    +config KEXEC
>>>> +    bool "Kexec system call"
>>>> +    select KEXEC_CORE
>>>> +    help
>>>> +      kexec is a system call that implements the ability to shutdown
>>>> your
>>>> +      current kernel, and to start another kernel.  It is like a reboot
>>>> +      but it is independent of the system firmware.   And like a reboot
>>>> +      you can start any kernel with it, not just Linux.
>>>> +
>>>> +      The name comes from the similarity to the exec system call.
>>>> +
>>>>    config SECCOMP
>>>>        bool "Enable seccomp to safely compute untrusted bytecode"
>>>>        depends on PROC_FS
>>>> diff --git a/arch/loongarch/include/asm/kexec.h
>>>> b/arch/loongarch/include/asm/kexec.h
>>>> new file mode 100644
>>>> index 000000000000..5c9e7b5eccb8
>>>> --- /dev/null
>>>> +++ b/arch/loongarch/include/asm/kexec.h
>>>> @@ -0,0 +1,58 @@
>>>> +/* SPDX-License-Identifier: GPL-2.0 */
>>>> +/*
>>>> + * kexec.h for kexec
>>>> + *
>>>> + * Copyright (C) 2022 Loongson Technology Corporation Limited
>>>> + */
>>>> +
>>>> +#ifndef _ASM_KEXEC_H
>>>> +#define _ASM_KEXEC_H
>>>> +
>>>> +#include <asm/stacktrace.h>
>>>> +#include <asm/page.h>
>>>> +
>>>> +/* Maximum physical address we can use pages from */
>>>> +#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
>>>> +/* Maximum address we can reach in physical address mode */
>>>> +#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
>>>> + /* Maximum address we can use for the control code buffer */
>>>> +#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL)
>>>> +
>>>> +/* Reserve a page for the control code buffer */
>>>> +#define KEXEC_CONTROL_PAGE_SIZE PAGE_SIZE
>>>> +
>>>> +/* The native architecture */
>>>> +#define KEXEC_ARCH KEXEC_ARCH_LOONGARCH
>>>> +
>>>> +static inline void crash_setup_regs(struct pt_regs *newregs,
>>>> +                    struct pt_regs *oldregs)
>>>> +{
>>>> +    if (oldregs)
>>>> +        memcpy(newregs, oldregs, sizeof(*newregs));
>>>> +    else
>>>> +        prepare_frametrace(newregs);
>>>> +}
>>>> +
>>>> +#define ARCH_HAS_KIMAGE_ARCH
>>>> +
>>>> +struct kimage_arch {
>>>> +    unsigned long boot_flag;
>>>> +    unsigned long fdt_addr;
>>>> +};
>>>> +
>>>> +typedef void (*do_kexec_t)(unsigned long boot_flag,
>>>> +               unsigned long fdt_addr,
>>>> +               unsigned long first_ind_entry,
>>>> +               unsigned long jump_addr);
>>>> +
>>>> +struct kimage;
>>>> +extern const unsigned char relocate_new_kernel[];
>>>> +extern const size_t relocate_new_kernel_size;
>>>> +
>>>> +#ifdef CONFIG_SMP
>>>> +extern atomic_t kexec_ready_to_reboot;
>>>> +extern const unsigned char kexec_smp_wait[];
>>>> +extern void kexec_reboot(void);
>>>> +#endif
>>>> +
>>>> +#endif /* !_ASM_KEXEC_H */
>>>> diff --git a/arch/loongarch/kernel/Makefile
>>>> b/arch/loongarch/kernel/Makefile
>>>> index a213e994db68..20b64ac3f128 100644
>>>> --- a/arch/loongarch/kernel/Makefile
>>>> +++ b/arch/loongarch/kernel/Makefile
>>>> @@ -17,6 +17,8 @@ obj-$(CONFIG_CPU_HAS_FPU)    += fpu.o
>>>>    obj-$(CONFIG_MODULES)        += module.o module-sections.o
>>>>    obj-$(CONFIG_STACKTRACE)    += stacktrace.o
>>>>    +obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o
>>>> +
>>>>    obj-$(CONFIG_PROC_FS)        += proc.o
>>>>      obj-$(CONFIG_SMP)        += smp.o
>>>> diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S
>>>> index 01bac62a6442..22bdf4928325 100644
>>>> --- a/arch/loongarch/kernel/head.S
>>>> +++ b/arch/loongarch/kernel/head.S
>>>> @@ -20,7 +20,12 @@
>>>>      _head:
>>>>        .word    MZ_MAGIC        /* "MZ", MS-DOS header */
>>>> -    .org    0x3c            /* 0x04 ~ 0x3b reserved */
>>>> +    .org    0x8
>>>> +    .quad    0            /* Image load offset from start of RAM */
>>>> +    .dword    _end - _text        /* Effective size of kernel image */
>>>> +    .quad    0
>>>> +    .dword    kernel_entry        /* Kernel entry point */
>>>> +    .org    0x3c            /* 0x28 ~ 0x3b reserved */
>>>>        .long    pe_header - _head    /* Offset to the PE header */
>>>>      pe_header:
>>>> diff --git a/arch/loongarch/kernel/machine_kexec.c
>>>> b/arch/loongarch/kernel/machine_kexec.c
>>>> new file mode 100644
>>>> index 000000000000..4ffcd4cd9c8c
>>>> --- /dev/null
>>>> +++ b/arch/loongarch/kernel/machine_kexec.c
>>>> @@ -0,0 +1,178 @@
>>>> +// SPDX-License-Identifier: GPL-2.0-only
>>>> +/*
>>>> + * machine_kexec.c for kexec
>>>> + *
>>>> + * Copyright (C) 2022 Loongson Technology Corporation Limited
>>>> + */
>>>> +#include <linux/compiler.h>
>>>> +#include <linux/cpu.h>
>>>> +#include <linux/kexec.h>
>>>> +#include <linux/mm.h>
>>>> +#include <linux/delay.h>
>>>> +#include <linux/libfdt.h>
>>>> +#include <linux/of_fdt.h>
>>>> +
>>>> +#include <asm/bootinfo.h>
>>>> +#include <asm/cacheflush.h>
>>>> +#include <asm/page.h>
>>>> +
>>>> +/* 0x100000 ~ 0x200000 is safe */
>>>> +#define KEXEC_CTRL_CODE    TO_CACHE(0x100000UL)
>>>> +#define KEXEC_BLOB_ADDR    TO_CACHE(0x108000UL)
>>>> +
>>>> +static unsigned long reboot_code_buffer;
>>>> +#ifdef CONFIG_SMP
>>>> +void (*relocated_kexec_smp_wait)(void *);
>>>> +atomic_t kexec_ready_to_reboot = ATOMIC_INIT(0);
>>>> +#endif
>>>> +
>>>> +static unsigned long jump_addr;
>>>> +static unsigned long first_ind_entry;
>>>> +static unsigned long boot_flag;
>>>> +static unsigned long fdt_addr;
>>>> +
>>>> +static void kexec_image_info(const struct kimage *kimage)
>>>> +{
>>>> +    unsigned long i;
>>>> +
>>>> +    pr_debug("kexec kimage info:\n");
>>>> +    pr_debug("\ttype:        %d\n", kimage->type);
>>>> +    pr_debug("\tstart:       %lx\n", kimage->start);
>>>> +    pr_debug("\thead:        %lx\n", kimage->head);
>>>> +    pr_debug("\tnr_segments: %lu\n", kimage->nr_segments);
>>>> +
>>>> +    for (i = 0; i < kimage->nr_segments; i++) {
>>>> +        pr_debug("\t    segment[%lu]: %016lx - %016lx", i,
>>>> +            kimage->segment[i].mem,
>>>> +            kimage->segment[i].mem + kimage->segment[i].memsz);
>>>> +        pr_debug("\t\t0x%lx bytes, %lu pages\n",
>>>> +            (unsigned long)kimage->segment[i].memsz,
>>>> +            (unsigned long)kimage->segment[i].memsz /  PAGE_SIZE);
>>>> +    }
>>>> +}
>>>> +
>>>> +int machine_kexec_prepare(struct kimage *kimage)
>>>> +{
>>>> +    int i;
>>>> +    void *dtb = (void *)KEXEC_BLOB_ADDR;
>>>> +
>>>> +    kexec_image_info(kimage);
>>>> +
>>>> +    /* Find the Flattened Device Tree */
>>>> +    for (i = 0; i < kimage->nr_segments; i++) {
>>>> +        if (!fdt_check_header(kimage->segment[i].buf)) {
>>>> +            memcpy(dtb, kimage->segment[i].buf, SZ_64K);
>>>> +            kimage->arch.boot_flag = fw_arg0;
>>>> +            kimage->arch.fdt_addr = (unsigned long) dtb;
>>>> +            break;
>>>> +        }
>>>> +        continue;
>>>> +    }
>>>> +
>>>> +    /* kexec need a safe page to save reboot_code_buffer */
>>>> +    kimage->control_code_page = virt_to_page((void *)KEXEC_CTRL_CODE);
>>>> +
>>>> +    reboot_code_buffer =
>>>> +      (unsigned long)page_address(kimage->control_code_page);
>>>> +    memcpy((void *)reboot_code_buffer, relocate_new_kernel,
>>>> +           relocate_new_kernel_size);
>>> It copys same content to KEXEC_CTRL_CODE each time, could we do this at
>>> boot time?
>> I think it's possible to have the copy action happen at boot-time or
>> during the prepare phase. (RISCV in prepare, MIPS in boot-time)
>>
>>> BTW, our system always keep the low-2MB no used, on mips-loongson or
>>> LoongArch. Is that necessary on LoongArch? We cannot use parameter
>>> 'mem=YYM' normally but 'mem=YYM@2M' is ok. And the low-2MB is not
>>> in virtual memory management, although we can get it in kernel.
>> For existing kernels, the low 2M has been reserved by
>> memblock_reserve(PHYS_OFFSET, 0x200000), maybe it is acceptable to keep
>> the low 2M behavior.
>>
>> Yes, we need to use "mem=YM@2M" if the low 2M is reserved.
>>
>>> In kexec/kdump process, we can follows kimage_alloc_control_pages().
>>> When the boot cpu copy complete the second-kernels, all cpus can jump
>>> to a kernel-entry-trampoline which is in kernel image. Then we don't
>>> worry about the code can be destroyed. The kernel-entry-trampoline
>>> get its cpuid, keep non-boot cpus do as kexec_smp_wait and let boot
>>> cpu go kernel-entry. In this way we can drop the low-2MB IMO.
>> It is also feasible to dynamically allocate control pages, but it is
>> easier to use a low 2M approach. What do you think, Huacai?
> I prefer to use the low 2MB.
>
> Huacai
Low 2MB also is ok for me, like a trick. And should it free
control_code_page here?

>>>> +
>>>> +    /* All secondary cpus now may jump to kexec_smp_wait cycle */
>>>> +    relocated_kexec_smp_wait = reboot_code_buffer +
>>>> +        (void *)(kexec_smp_wait - relocate_new_kernel);
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +void machine_kexec_cleanup(struct kimage *kimage)
>>>> +{
>>>> +}
>>>> +
>>>> +#ifdef CONFIG_SMP
>>>> +void kexec_reboot(void)
>>>> +{
>>>> +    do_kexec_t do_kexec = NULL;
>>>> +
>>>> +    /* All secondary cpus go to kexec_smp_wait */
>>>> +    if (smp_processor_id() > 0) {
>>>> +        relocated_kexec_smp_wait(NULL);
>>>> +        unreachable();
>>>> +    }
>>>> +
>>>> +    do_kexec = (void *)reboot_code_buffer;
>>>> +    do_kexec(boot_flag, fdt_addr, first_ind_entry, jump_addr);
>>>> +
>>>> +    unreachable();
>>>> +}
>>>> +
>>>> +static void kexec_shutdown_secondary(void *)
>>>> +{
>>>> +    local_irq_disable();
>>>> +    while (!atomic_read(&kexec_ready_to_reboot))
>>>> +        cpu_relax();
>>>> +
>>>> +    kexec_reboot();
>>>> +}
>>>> +
>>>> +void machine_crash_shutdown(struct pt_regs *regs)
>>>> +{
>>>> +}
>>>> +#endif
>>>> +
>>>> +void machine_shutdown(void)
>>>> +{
>>>> +    smp_call_function(kexec_shutdown_secondary, NULL, 0);
>>>> +}
>>>> +
>>>> +void machine_kexec(struct kimage *image)
>>>> +{
>>>> +    unsigned long entry;
>>>> +    unsigned long *ptr;
>>>> +    struct kimage_arch *internal = &image->arch;
>>>> +
>>>> +    boot_flag = internal->boot_flag;
>>>> +    fdt_addr = internal->fdt_addr;
>>>> +
>>>> +    jump_addr = (unsigned long)phys_to_virt(image->start);
>>>> +
>>>> +    first_ind_entry = (unsigned long)phys_to_virt(image->head &
>>>> PAGE_MASK);
>>>> +
>>>> +    /*
>>>> +     * The generic kexec code builds a page list with physical
>>>> +     * addresses. they are directly accessible through XKPRANGE
>>>> +     * hence the phys_to_virt() call.
>>>> +     */
>>>> +    for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE);
>>>> +         ptr = (entry & IND_INDIRECTION) ?
>>>> +           phys_to_virt(entry & PAGE_MASK) : ptr + 1) {
>>>> +        if (*ptr & IND_SOURCE || *ptr & IND_INDIRECTION ||
>>>> +            *ptr & IND_DESTINATION)
>>>> +            *ptr = (unsigned long) phys_to_virt(*ptr);
>>>> +    }
>>>> +
>>>> +    /* Mark offline before disabling local irq. */
>>>> +    set_cpu_online(smp_processor_id(), false);
>>>> +
>>>> +    /* we do not want to be bothered. */
>>>> +    local_irq_disable();
>>>> +
>>>> +    pr_notice("Will call new kernel at %lx\n", jump_addr);
>>>> +    pr_notice("FDT image at %lx\n", fdt_addr);
>>>> +    pr_notice("Bye ...\n");
>>>> +
>>>> +    /* Make reboot code buffer available to the boot CPU. */
>>>> +    flush_cache_all();
>>>> +
>>>> +    atomic_set(&kexec_ready_to_reboot, 1);
>>>> +
>>>> +    /*
>>>> +     * We know we were online, and there will be no incoming IPIs at
>>>> +     * this point.
>>>> +     */
>>>> +    set_cpu_online(smp_processor_id(), true);
>>>> +
>>>> +    /* Ensure remote CPUs observe that we're online before rebooting. */
>>>> +    smp_mb__after_atomic();
>>>> +
>>>> +    kexec_reboot();
>>>> +}
>>>> diff --git a/arch/loongarch/kernel/relocate_kernel.S
>>>> b/arch/loongarch/kernel/relocate_kernel.S
>>>> new file mode 100644
>>>> index 000000000000..d1f242f74ea8
>>>> --- /dev/null
>>>> +++ b/arch/loongarch/kernel/relocate_kernel.S
>>>> @@ -0,0 +1,125 @@
>>>> +/* SPDX-License-Identifier: GPL-2.0 */
>>>> +/*
>>>> + * relocate_kernel.S for kexec
>>>> + *
>>>> + * Copyright (C) 2022 Loongson Technology Corporation Limited
>>>> + */
>>>> +
>>>> +#include <linux/kexec.h>
>>>> +
>>>> +#include <asm/asm.h>
>>>> +#include <asm/asmmacro.h>
>>>> +#include <asm/regdef.h>
>>>> +#include <asm/loongarch.h>
>>>> +#include <asm/stackframe.h>
>>>> +#include <asm/addrspace.h>
>>>> +
>>>> +#define IPI_REG_BASE 0x1fe01000
>>>> +
>>>> +SYM_CODE_START(relocate_new_kernel)
>>>> +    /*
>>>> +     * s0: Boot flag passed to the new kernel
>>>> +     * s1: Virt address of the FDT image
>>>> +     * s2: Pointer to the current entry
>>>> +     * s3: Virt address to jump to after relocation
>>>> +     */
>>>> +    move        s0, a0
>>>> +    move        s1, a1
>>>> +    move        s2, a2
>>>> +    move        s3, a3
>>>> +
>>>> +process_entry:
>>>> +    PTR_L        s4, s2, 0
>>>> +    PTR_ADDI    s2, s2, SZREG
>>>> +
>>>> +    /* destination page */
>>>> +    andi        s5, s4, IND_DESTINATION
>>>> +    beqz        s5, 1f
>>>> +    li.w        t0, ~0x1
>>>> +    and        s6, s4, t0    /* store destination addr in s6 */
>>>> +    b        process_entry
>>>> +
>>>> +1:
>>>> +    /* indirection page, update s2    */
>>>> +    andi        s5, s4, IND_INDIRECTION
>>>> +    beqz        s5, 1f
>>>> +    li.w        t0, ~0x2
>>>> +    and        s2, s4, t0
>>>> +    b        process_entry
>>>> +
>>>> +1:
>>>> +    /* done page */
>>>> +    andi        s5, s4, IND_DONE
>>>> +    beqz        s5, 1f
>>>> +    b        done
>>>> +1:
>>>> +    /* source page */
>>>> +    andi        s5, s4, IND_SOURCE
>>>> +    beqz        s5, process_entry
>>>> +    li.w        t0, ~0x8
>>>> +    and        s4, s4, t0
>>>> +    li.w        s8, (1 << _PAGE_SHIFT) / SZREG
>>>> +
>>>> +copy_word:
>>>> +    /* copy page word by word */
>>>> +    REG_L        s7, s4, 0
>>>> +    REG_S        s7, s6, 0
>>>> +    PTR_ADDI    s6, s6, SZREG
>>>> +    PTR_ADDI    s4, s4, SZREG
>>>> +    LONG_ADDI    s8, s8, -1
>>>> +    beqz        s8, process_entry
>>>> +    b        copy_word
>>>> +    b        process_entry
>>>> +
>>>> +done:
>>>> +    dbar        0
>>> ibar, too?
>> Will add ibar 0.
>>
>>>> +
>>>> +    move        a0, s0
>>>> +    move        a1, s1
>>>> +    /* jump to the new kernel */
>>>> +    jr        s3
>>>> +SYM_CODE_END(relocate_new_kernel)
>>>> +
>>>> +#ifdef CONFIG_SMP
>>>> +/*
>>>> + * Other CPUs should wait until code is relocated and
>>>> + * then start at entry (?) point.
>>>> + */
>>>> +SYM_CODE_START(kexec_smp_wait)
>>>> +    li.d        t0, IPI_REG_BASE
>>>> +    li.d        t1, UNCACHE_BASE
>>>> +    or        t0, t0, t1
>>>> +
>>>> +    /*
>>>> +     * s1:initfn
>>>> +     * t0:base t1:cpuid t2:node t3:core t4:count
>>>> +     */
>>>> +    csrrd        t1, LOONGARCH_CSR_CPUID
>>>> +    andi        t1, t1, CSR_CPUID_COREID
>>>> +    andi        t3, t1, 0x3
>>>> +    slli.w        t3, t3, 8              /* get core id */
>>>> +    or        t0, t0, t3
>>>> +    andi        t2, t1, 0x3c
>>>> +    slli.d        t2, t2, 42             /* get node id */
>>>> +    or        t0, t0, t2
>>>> +
>>>> +1:    li.w        t4, 0x100              /* wait for init loop */
>>>> +2:    addi.w        t4, t4, -1             /* limit mailbox access */
>>>> +    bnez        t4, 2b
>>>> +    ld.w        s1, t0, 0x20           /* check PC as an indicator */
>>> Can we do this with iocsr*?
>> OK, I will consider the implementation in the iocsr way.
>>
>> Thanks,
>> Youling
>>> Thanks,
>>> Jinyang
>>>> +    beqz        s1, 1b
>>>> +    ld.d        s1, t0, 0x20           /* get PC via mailbox */
>>>> +    ld.d        sp, t0, 0x28           /* get SP via mailbox */
>>>> +    ld.d        tp, t0, 0x30           /* get TP via mailbox */
>>>> +
>>>> +    li.d        t0, CACHE_BASE
>>>> +    or        s1, s1, t0
>>>> +    jr        s1                     /* jump to initial PC */
>>>> +SYM_CODE_END(kexec_smp_wait)
>>>> +#endif
>>>> +
>>>> +relocate_new_kernel_end:
>>>> +
>>>> +SYM_DATA_START(relocate_new_kernel_size)
>>>> +    PTR        relocate_new_kernel_end - relocate_new_kernel
>>>> +SYM_DATA_END(relocate_new_kernel_size)


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/3] LoongArch: Add kexec support
  2022-08-30  3:25     ` Youling Tang
  2022-08-30  3:42       ` Huacai Chen
@ 2022-08-31  3:38       ` Youling Tang
  1 sibling, 0 replies; 20+ messages in thread
From: Youling Tang @ 2022-08-31  3:38 UTC (permalink / raw)
  To: Jinyang He, Huacai Chen
  Cc: Baoquan He, Eric Biederman, WANG Xuerui, Vivek Goyal, Dave Young,
	Guo Ren, Jiaxun Yang, kexec, loongarch, linux-kernel



On 08/30/2022 11:25 AM, Youling Tang wrote:
> Hi, Jinyang
>
> On 08/30/2022 09:53 AM, Jinyang He wrote:
>> Hi, Youling,
>>
>>
>> On 08/29/2022 12:37 PM, Youling Tang wrote:
>>> Add three new files, kexec.h, machine_kexec.c and relocate_kernel.S to
>>> the
>>> LoongArch architecture that add support for the kexec re-boot mechanis
>>> (CONFIG_KEXEC) on LoongArch platforms.
>>>
>>> Supports loading vmlinux (vmlinux.elf) in ELF format and vmlinux.efi in
>>> PE format.
>>>
>>> I tested this on  LoongArch 3A5000 machine and works as expected,
>>>
>>>   $ sudo kexec -l /boot/vmlinux.efi --reuse-cmdline
>>>   $ sudo kexec -e
>>>
>>> Signed-off-by: Youling Tang <tangyouling@loongson.cn>
>>> ---
>>>   arch/loongarch/Kconfig                  |  11 ++
>>>   arch/loongarch/include/asm/kexec.h      |  58 ++++++++
>>>   arch/loongarch/kernel/Makefile          |   2 +
>>>   arch/loongarch/kernel/head.S            |   7 +-
>>>   arch/loongarch/kernel/machine_kexec.c   | 178 ++++++++++++++++++++++++
>>>   arch/loongarch/kernel/relocate_kernel.S | 125 +++++++++++++++++
>>>   6 files changed, 380 insertions(+), 1 deletion(-)
>>>   create mode 100644 arch/loongarch/include/asm/kexec.h
>>>   create mode 100644 arch/loongarch/kernel/machine_kexec.c
>>>   create mode 100644 arch/loongarch/kernel/relocate_kernel.S
>>>
>>> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
>>> index 45364cffc793..903c82fa958d 100644
>>> --- a/arch/loongarch/Kconfig
>>> +++ b/arch/loongarch/Kconfig
>>> @@ -409,6 +409,17 @@ config FORCE_MAX_ZONEORDER
>>>         The page size is not necessarily 4KB.  Keep this in mind
>>>         when choosing a value for this option.
>>>   +config KEXEC
>>> +    bool "Kexec system call"
>>> +    select KEXEC_CORE
>>> +    help
>>> +      kexec is a system call that implements the ability to shutdown
>>> your
>>> +      current kernel, and to start another kernel.  It is like a reboot
>>> +      but it is independent of the system firmware.   And like a reboot
>>> +      you can start any kernel with it, not just Linux.
>>> +
>>> +      The name comes from the similarity to the exec system call.
>>> +
>>>   config SECCOMP
>>>       bool "Enable seccomp to safely compute untrusted bytecode"
>>>       depends on PROC_FS
>>> diff --git a/arch/loongarch/include/asm/kexec.h
>>> b/arch/loongarch/include/asm/kexec.h
>>> new file mode 100644
>>> index 000000000000..5c9e7b5eccb8
>>> --- /dev/null
>>> +++ b/arch/loongarch/include/asm/kexec.h
>>> @@ -0,0 +1,58 @@
>>> +/* SPDX-License-Identifier: GPL-2.0 */
>>> +/*
>>> + * kexec.h for kexec
>>> + *
>>> + * Copyright (C) 2022 Loongson Technology Corporation Limited
>>> + */
>>> +
>>> +#ifndef _ASM_KEXEC_H
>>> +#define _ASM_KEXEC_H
>>> +
>>> +#include <asm/stacktrace.h>
>>> +#include <asm/page.h>
>>> +
>>> +/* Maximum physical address we can use pages from */
>>> +#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
>>> +/* Maximum address we can reach in physical address mode */
>>> +#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
>>> + /* Maximum address we can use for the control code buffer */
>>> +#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL)
>>> +
>>> +/* Reserve a page for the control code buffer */
>>> +#define KEXEC_CONTROL_PAGE_SIZE PAGE_SIZE
>>> +
>>> +/* The native architecture */
>>> +#define KEXEC_ARCH KEXEC_ARCH_LOONGARCH
>>> +
>>> +static inline void crash_setup_regs(struct pt_regs *newregs,
>>> +                    struct pt_regs *oldregs)
>>> +{
>>> +    if (oldregs)
>>> +        memcpy(newregs, oldregs, sizeof(*newregs));
>>> +    else
>>> +        prepare_frametrace(newregs);
>>> +}
>>> +
>>> +#define ARCH_HAS_KIMAGE_ARCH
>>> +
>>> +struct kimage_arch {
>>> +    unsigned long boot_flag;
>>> +    unsigned long fdt_addr;
>>> +};
>>> +
>>> +typedef void (*do_kexec_t)(unsigned long boot_flag,
>>> +               unsigned long fdt_addr,
>>> +               unsigned long first_ind_entry,
>>> +               unsigned long jump_addr);
>>> +
>>> +struct kimage;
>>> +extern const unsigned char relocate_new_kernel[];
>>> +extern const size_t relocate_new_kernel_size;
>>> +
>>> +#ifdef CONFIG_SMP
>>> +extern atomic_t kexec_ready_to_reboot;
>>> +extern const unsigned char kexec_smp_wait[];
>>> +extern void kexec_reboot(void);
>>> +#endif
>>> +
>>> +#endif /* !_ASM_KEXEC_H */
>>> diff --git a/arch/loongarch/kernel/Makefile
>>> b/arch/loongarch/kernel/Makefile
>>> index a213e994db68..20b64ac3f128 100644
>>> --- a/arch/loongarch/kernel/Makefile
>>> +++ b/arch/loongarch/kernel/Makefile
>>> @@ -17,6 +17,8 @@ obj-$(CONFIG_CPU_HAS_FPU)    += fpu.o
>>>   obj-$(CONFIG_MODULES)        += module.o module-sections.o
>>>   obj-$(CONFIG_STACKTRACE)    += stacktrace.o
>>>   +obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o
>>> +
>>>   obj-$(CONFIG_PROC_FS)        += proc.o
>>>     obj-$(CONFIG_SMP)        += smp.o
>>> diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S
>>> index 01bac62a6442..22bdf4928325 100644
>>> --- a/arch/loongarch/kernel/head.S
>>> +++ b/arch/loongarch/kernel/head.S
>>> @@ -20,7 +20,12 @@
>>>     _head:
>>>       .word    MZ_MAGIC        /* "MZ", MS-DOS header */
>>> -    .org    0x3c            /* 0x04 ~ 0x3b reserved */
>>> +    .org    0x8
>>> +    .quad    0            /* Image load offset from start of RAM */
>>> +    .dword    _end - _text        /* Effective size of kernel image */
>>> +    .quad    0
>>> +    .dword    kernel_entry        /* Kernel entry point */
>>> +    .org    0x3c            /* 0x28 ~ 0x3b reserved */
>>>       .long    pe_header - _head    /* Offset to the PE header */
>>>     pe_header:
>>> diff --git a/arch/loongarch/kernel/machine_kexec.c
>>> b/arch/loongarch/kernel/machine_kexec.c
>>> new file mode 100644
>>> index 000000000000..4ffcd4cd9c8c
>>> --- /dev/null
>>> +++ b/arch/loongarch/kernel/machine_kexec.c
>>> @@ -0,0 +1,178 @@
>>> +// SPDX-License-Identifier: GPL-2.0-only
>>> +/*
>>> + * machine_kexec.c for kexec
>>> + *
>>> + * Copyright (C) 2022 Loongson Technology Corporation Limited
>>> + */
>>> +#include <linux/compiler.h>
>>> +#include <linux/cpu.h>
>>> +#include <linux/kexec.h>
>>> +#include <linux/mm.h>
>>> +#include <linux/delay.h>
>>> +#include <linux/libfdt.h>
>>> +#include <linux/of_fdt.h>
>>> +
>>> +#include <asm/bootinfo.h>
>>> +#include <asm/cacheflush.h>
>>> +#include <asm/page.h>
>>> +
>>> +/* 0x100000 ~ 0x200000 is safe */
>>> +#define KEXEC_CTRL_CODE    TO_CACHE(0x100000UL)
>>> +#define KEXEC_BLOB_ADDR    TO_CACHE(0x108000UL)
>>> +
>>> +static unsigned long reboot_code_buffer;
>>> +#ifdef CONFIG_SMP
>>> +void (*relocated_kexec_smp_wait)(void *);
>>> +atomic_t kexec_ready_to_reboot = ATOMIC_INIT(0);
>>> +#endif
>>> +
>>> +static unsigned long jump_addr;
>>> +static unsigned long first_ind_entry;
>>> +static unsigned long boot_flag;
>>> +static unsigned long fdt_addr;
>>> +
>>> +static void kexec_image_info(const struct kimage *kimage)
>>> +{
>>> +    unsigned long i;
>>> +
>>> +    pr_debug("kexec kimage info:\n");
>>> +    pr_debug("\ttype:        %d\n", kimage->type);
>>> +    pr_debug("\tstart:       %lx\n", kimage->start);
>>> +    pr_debug("\thead:        %lx\n", kimage->head);
>>> +    pr_debug("\tnr_segments: %lu\n", kimage->nr_segments);
>>> +
>>> +    for (i = 0; i < kimage->nr_segments; i++) {
>>> +        pr_debug("\t    segment[%lu]: %016lx - %016lx", i,
>>> +            kimage->segment[i].mem,
>>> +            kimage->segment[i].mem + kimage->segment[i].memsz);
>>> +        pr_debug("\t\t0x%lx bytes, %lu pages\n",
>>> +            (unsigned long)kimage->segment[i].memsz,
>>> +            (unsigned long)kimage->segment[i].memsz /  PAGE_SIZE);
>>> +    }
>>> +}
>>> +
>>> +int machine_kexec_prepare(struct kimage *kimage)
>>> +{
>>> +    int i;
>>> +    void *dtb = (void *)KEXEC_BLOB_ADDR;
>>> +
>>> +    kexec_image_info(kimage);
>>> +
>>> +    /* Find the Flattened Device Tree */
>>> +    for (i = 0; i < kimage->nr_segments; i++) {
>>> +        if (!fdt_check_header(kimage->segment[i].buf)) {
>>> +            memcpy(dtb, kimage->segment[i].buf, SZ_64K);
>>> +            kimage->arch.boot_flag = fw_arg0;
>>> +            kimage->arch.fdt_addr = (unsigned long) dtb;
>>> +            break;
>>> +        }
>>> +        continue;
>>> +    }
>>> +
>>> +    /* kexec need a safe page to save reboot_code_buffer */
>>> +    kimage->control_code_page = virt_to_page((void *)KEXEC_CTRL_CODE);
>>> +
>>> +    reboot_code_buffer =
>>> +      (unsigned long)page_address(kimage->control_code_page);
>>> +    memcpy((void *)reboot_code_buffer, relocate_new_kernel,
>>> +           relocate_new_kernel_size);
>> It copys same content to KEXEC_CTRL_CODE each time, could we do this at
>> boot time?
> I think it's possible to have the copy action happen at boot-time or
> during the prepare phase. (RISCV in prepare, MIPS in boot-time)
>
>>
>> BTW, our system always keep the low-2MB no used, on mips-loongson or
>> LoongArch. Is that necessary on LoongArch? We cannot use parameter
>> 'mem=YYM' normally but 'mem=YYM@2M' is ok. And the low-2MB is not
>> in virtual memory management, although we can get it in kernel.
> For existing kernels, the low 2M has been reserved by
> memblock_reserve(PHYS_OFFSET, 0x200000), maybe it is acceptable to keep
> the low 2M behavior.
>
> Yes, we need to use "mem=YM@2M" if the low 2M is reserved.
>
>>
>> In kexec/kdump process, we can follows kimage_alloc_control_pages().
>> When the boot cpu copy complete the second-kernels, all cpus can jump
>> to a kernel-entry-trampoline which is in kernel image. Then we don't
>> worry about the code can be destroyed. The kernel-entry-trampoline
>> get its cpuid, keep non-boot cpus do as kexec_smp_wait and let boot
>> cpu go kernel-entry. In this way we can drop the low-2MB IMO.
>
> It is also feasible to dynamically allocate control pages, but it is
> easier to use a low 2M approach. What do you think, Huacai?
>
>>
>>> +
>>> +    /* All secondary cpus now may jump to kexec_smp_wait cycle */
>>> +    relocated_kexec_smp_wait = reboot_code_buffer +
>>> +        (void *)(kexec_smp_wait - relocate_new_kernel);
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +void machine_kexec_cleanup(struct kimage *kimage)
>>> +{
>>> +}
>>> +
>>> +#ifdef CONFIG_SMP
>>> +void kexec_reboot(void)
>>> +{
>>> +    do_kexec_t do_kexec = NULL;
>>> +
>>> +    /* All secondary cpus go to kexec_smp_wait */
>>> +    if (smp_processor_id() > 0) {
>>> +        relocated_kexec_smp_wait(NULL);
>>> +        unreachable();
>>> +    }
>>> +
>>> +    do_kexec = (void *)reboot_code_buffer;
>>> +    do_kexec(boot_flag, fdt_addr, first_ind_entry, jump_addr);
>>> +
>>> +    unreachable();
>>> +}
>>> +
>>> +static void kexec_shutdown_secondary(void *)
>>> +{
>>> +    local_irq_disable();
>>> +    while (!atomic_read(&kexec_ready_to_reboot))
>>> +        cpu_relax();
>>> +
>>> +    kexec_reboot();
>>> +}
>>> +
>>> +void machine_crash_shutdown(struct pt_regs *regs)
>>> +{
>>> +}
>>> +#endif
>>> +
>>> +void machine_shutdown(void)
>>> +{
>>> +    smp_call_function(kexec_shutdown_secondary, NULL, 0);
>>> +}
>>> +
>>> +void machine_kexec(struct kimage *image)
>>> +{
>>> +    unsigned long entry;
>>> +    unsigned long *ptr;
>>> +    struct kimage_arch *internal = &image->arch;
>>> +
>>> +    boot_flag = internal->boot_flag;
>>> +    fdt_addr = internal->fdt_addr;
>>> +
>>> +    jump_addr = (unsigned long)phys_to_virt(image->start);
>>> +
>>> +    first_ind_entry = (unsigned long)phys_to_virt(image->head &
>>> PAGE_MASK);
>>> +
>>> +    /*
>>> +     * The generic kexec code builds a page list with physical
>>> +     * addresses. they are directly accessible through XKPRANGE
>>> +     * hence the phys_to_virt() call.
>>> +     */
>>> +    for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE);
>>> +         ptr = (entry & IND_INDIRECTION) ?
>>> +           phys_to_virt(entry & PAGE_MASK) : ptr + 1) {
>>> +        if (*ptr & IND_SOURCE || *ptr & IND_INDIRECTION ||
>>> +            *ptr & IND_DESTINATION)
>>> +            *ptr = (unsigned long) phys_to_virt(*ptr);
>>> +    }
>>> +
>>> +    /* Mark offline before disabling local irq. */
>>> +    set_cpu_online(smp_processor_id(), false);
>>> +
>>> +    /* we do not want to be bothered. */
>>> +    local_irq_disable();
>>> +
>>> +    pr_notice("Will call new kernel at %lx\n", jump_addr);
>>> +    pr_notice("FDT image at %lx\n", fdt_addr);
>>> +    pr_notice("Bye ...\n");
>>> +
>>> +    /* Make reboot code buffer available to the boot CPU. */
>>> +    flush_cache_all();
>>> +
>>> +    atomic_set(&kexec_ready_to_reboot, 1);
>>> +
>>> +    /*
>>> +     * We know we were online, and there will be no incoming IPIs at
>>> +     * this point.
>>> +     */
>>> +    set_cpu_online(smp_processor_id(), true);
>>> +
>>> +    /* Ensure remote CPUs observe that we're online before
>>> rebooting. */
>>> +    smp_mb__after_atomic();
>>> +
>>> +    kexec_reboot();
>>> +}
>>> diff --git a/arch/loongarch/kernel/relocate_kernel.S
>>> b/arch/loongarch/kernel/relocate_kernel.S
>>> new file mode 100644
>>> index 000000000000..d1f242f74ea8
>>> --- /dev/null
>>> +++ b/arch/loongarch/kernel/relocate_kernel.S
>>> @@ -0,0 +1,125 @@
>>> +/* SPDX-License-Identifier: GPL-2.0 */
>>> +/*
>>> + * relocate_kernel.S for kexec
>>> + *
>>> + * Copyright (C) 2022 Loongson Technology Corporation Limited
>>> + */
>>> +
>>> +#include <linux/kexec.h>
>>> +
>>> +#include <asm/asm.h>
>>> +#include <asm/asmmacro.h>
>>> +#include <asm/regdef.h>
>>> +#include <asm/loongarch.h>
>>> +#include <asm/stackframe.h>
>>> +#include <asm/addrspace.h>
>>> +
>>> +#define IPI_REG_BASE 0x1fe01000
>>> +
>>> +SYM_CODE_START(relocate_new_kernel)
>>> +    /*
>>> +     * s0: Boot flag passed to the new kernel
>>> +     * s1: Virt address of the FDT image
>>> +     * s2: Pointer to the current entry
>>> +     * s3: Virt address to jump to after relocation
>>> +     */
>>> +    move        s0, a0
>>> +    move        s1, a1
>>> +    move        s2, a2
>>> +    move        s3, a3
>>> +
>>> +process_entry:
>>> +    PTR_L        s4, s2, 0
>>> +    PTR_ADDI    s2, s2, SZREG
>>> +
>>> +    /* destination page */
>>> +    andi        s5, s4, IND_DESTINATION
>>> +    beqz        s5, 1f
>>> +    li.w        t0, ~0x1
>>> +    and        s6, s4, t0    /* store destination addr in s6 */
>>> +    b        process_entry
>>> +
>>> +1:
>>> +    /* indirection page, update s2    */
>>> +    andi        s5, s4, IND_INDIRECTION
>>> +    beqz        s5, 1f
>>> +    li.w        t0, ~0x2
>>> +    and        s2, s4, t0
>>> +    b        process_entry
>>> +
>>> +1:
>>> +    /* done page */
>>> +    andi        s5, s4, IND_DONE
>>> +    beqz        s5, 1f
>>> +    b        done
>>> +1:
>>> +    /* source page */
>>> +    andi        s5, s4, IND_SOURCE
>>> +    beqz        s5, process_entry
>>> +    li.w        t0, ~0x8
>>> +    and        s4, s4, t0
>>> +    li.w        s8, (1 << _PAGE_SHIFT) / SZREG
>>> +
>>> +copy_word:
>>> +    /* copy page word by word */
>>> +    REG_L        s7, s4, 0
>>> +    REG_S        s7, s6, 0
>>> +    PTR_ADDI    s6, s6, SZREG
>>> +    PTR_ADDI    s4, s4, SZREG
>>> +    LONG_ADDI    s8, s8, -1
>>> +    beqz        s8, process_entry
>>> +    b        copy_word
>>> +    b        process_entry
>>> +
>>> +done:
>>> +    dbar        0
>> ibar, too?
>
> Will add ibar 0.
>
>>> +
>>> +    move        a0, s0
>>> +    move        a1, s1
>>> +    /* jump to the new kernel */
>>> +    jr        s3
>>> +SYM_CODE_END(relocate_new_kernel)
>>> +
>>> +#ifdef CONFIG_SMP
>>> +/*
>>> + * Other CPUs should wait until code is relocated and
>>> + * then start at entry (?) point.
>>> + */
>>> +SYM_CODE_START(kexec_smp_wait)
>>> +    li.d        t0, IPI_REG_BASE
>>> +    li.d        t1, UNCACHE_BASE
>>> +    or        t0, t0, t1
>>> +
>>> +    /*
>>> +     * s1:initfn
>>> +     * t0:base t1:cpuid t2:node t3:core t4:count
>>> +     */
>>> +    csrrd        t1, LOONGARCH_CSR_CPUID
>>> +    andi        t1, t1, CSR_CPUID_COREID
>>> +    andi        t3, t1, 0x3
>>> +    slli.w        t3, t3, 8              /* get core id */
>>> +    or        t0, t0, t3
>>> +    andi        t2, t1, 0x3c
>>> +    slli.d        t2, t2, 42             /* get node id */
>>> +    or        t0, t0, t2
>>> +
>>> +1:    li.w        t4, 0x100              /* wait for init loop */
>>> +2:    addi.w        t4, t4, -1             /* limit mailbox access */
>>> +    bnez        t4, 2b
>>> +    ld.w        s1, t0, 0x20           /* check PC as an indicator */
>> Can we do this with iocsr*?
>
> OK, I will consider the implementation in the iocsr way.
>

The kexec_smp_wait implementation will be modified in the next release
as follows:

SYM_CODE_START(kexec_smp_wait)
1:      li.w            t0, 0x100                       /* wait for init 
loop */
2:      addi.w          t0, t0, -1                      /* limit mailbox 
access */
         bnez            t0, 2b
         li.w            t1, LOONGARCH_IOCSR_MBUF0
         iocsrrd.w       s1, t1                          /* check PC as 
an indicator */
         beqz            s1, 1b
         iocsrrd.d       s1, t1                          /* get PC via 
mailbox */

         li.d            t0, CACHE_BASE
         or              s1, s1, t0
         jr              s1                              /* jump to 
initial PC */
SYM_CODE_END(kexec_smp_wait)

1) Using the iocsr method
2) Remove sp, tp settings. sp, tp will be obtained from cpuboot_data in
smpboot_entry.

Thanks again Jinyang for the suggestion.


Youling.

> Thanks,
> Youling
>>
>> Thanks,
>> Jinyang
>>> +    beqz        s1, 1b
>>> +    ld.d        s1, t0, 0x20           /* get PC via mailbox */
>>> +    ld.d        sp, t0, 0x28           /* get SP via mailbox */
>>> +    ld.d        tp, t0, 0x30           /* get TP via mailbox */
>>> +
>>> +    li.d        t0, CACHE_BASE
>>> +    or        s1, s1, t0
>>> +    jr        s1                     /* jump to initial PC */
>>> +SYM_CODE_END(kexec_smp_wait)
>>> +#endif
>>> +
>>> +relocate_new_kernel_end:
>>> +
>>> +SYM_DATA_START(relocate_new_kernel_size)
>>> +    PTR        relocate_new_kernel_end - relocate_new_kernel
>>> +SYM_DATA_END(relocate_new_kernel_size)
>>
>


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/3] LoongArch: Add kdump support
  2022-08-29  4:37 ` [PATCH 2/3] LoongArch: Add kdump support Youling Tang
@ 2022-09-04 12:21   ` Huacai Chen
  2022-09-05  0:54     ` Youling Tang
  0 siblings, 1 reply; 20+ messages in thread
From: Huacai Chen @ 2022-09-04 12:21 UTC (permalink / raw)
  To: Youling Tang
  Cc: Baoquan He, Eric Biederman, WANG Xuerui, Vivek Goyal, Dave Young,
	Guo Ren, Jiaxun Yang, kexec, loongarch, LKML

Hi, Youling,

I think crash.c can be merged into crash_dump.c

Huacai

On Mon, Aug 29, 2022 at 12:37 PM Youling Tang <tangyouling@loongson.cn> wrote:
>
> This patch adds support for kdump, the kernel will reserve a region
> for the crash kernel and jump there on panic.
>
> Arch-specific functions are added to allow for implementing a crash
> dump file interface, /proc/vmcore, which can be viewed as a ELF file.
>
> A user space tool, like kexec-tools, is responsible for allocating a
> separate region for the core's ELF header within crash kdump kernel
> memory and filling it in when executing kexec_load().
>
> Then, its location will be advertised to crash dump kernel via a new
> device-tree property, "linux,elfcorehdr", and crash dump kernel preserves
> the region for later use with fdt_reserve_elfcorehdr() at boot time.
>
> At the same time, it will also limit the crash kdump kernel to the
> crashkernel area via a new device-tree property, "linux, usable-memory-range",
> so as not to destroy the original kernel dump data.
>
> On crash dump kernel, /proc/vmcore will access the primary kernel's memory
> with copy_oldmem_page().
>
> I tested this on  LoongArch 3A5000 machine and works as expected (Suggest
> crashkernel parameter is "crashkernel=512M@2320M"), you may test it by
> triggering a crash through /proc/sysrq_trigger:
>
>  $ sudo kexec -p /boot/vmlinux-kdump --reuse-cmdline --append="nr_cpus=1"
>  # echo c > /proc/sysrq_trigger
>
> Signed-off-by: Youling Tang <tangyouling@loongson.cn>
> ---
>  arch/loongarch/Kconfig                  |  22 ++++++
>  arch/loongarch/Makefile                 |   4 +
>  arch/loongarch/kernel/Makefile          |   3 +-
>  arch/loongarch/kernel/crash.c           | 100 ++++++++++++++++++++++++
>  arch/loongarch/kernel/crash_dump.c      |  19 +++++
>  arch/loongarch/kernel/machine_kexec.c   |  12 ++-
>  arch/loongarch/kernel/mem.c             |   6 ++
>  arch/loongarch/kernel/relocate_kernel.S |   6 ++
>  arch/loongarch/kernel/setup.c           |  49 ++++++++++++
>  arch/loongarch/kernel/traps.c           |   4 +
>  10 files changed, 217 insertions(+), 8 deletions(-)
>  create mode 100644 arch/loongarch/kernel/crash.c
>  create mode 100644 arch/loongarch/kernel/crash_dump.c
>
> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
> index 903c82fa958d..7c1b07a5b5bd 100644
> --- a/arch/loongarch/Kconfig
> +++ b/arch/loongarch/Kconfig
> @@ -420,6 +420,28 @@ config KEXEC
>
>           The name comes from the similarity to the exec system call.
>
> +config CRASH_DUMP
> +       bool "Build kdump crash kernel"
> +       help
> +         Generate crash dump after being started by kexec. This should
> +         be normally only set in special crash dump kernels which are
> +         loaded in the main kernel with kexec-tools into a specially
> +         reserved region and then later executed after a crash by
> +         kdump/kexec.
> +
> +         For more details see Documentation/admin-guide/kdump/kdump.rst
> +
> +config PHYSICAL_START
> +       hex "Physical address where the kernel is loaded"
> +       default "0x9000000091000000" if 64BIT
> +       depends on CRASH_DUMP
> +       help
> +         This gives the XKPRANGE address where the kernel is loaded.
> +         If you plan to use kernel for capturing the crash dump change
> +         this value to start of the reserved region (the "X" value as
> +         specified in the "crashkernel=YM@XM" command line boot parameter
> +         passed to the panic-ed kernel).
> +
>  config SECCOMP
>         bool "Enable seccomp to safely compute untrusted bytecode"
>         depends on PROC_FS
> diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
> index 4bc47f47cfd8..7dabd580426d 100644
> --- a/arch/loongarch/Makefile
> +++ b/arch/loongarch/Makefile
> @@ -48,7 +48,11 @@ KBUILD_CFLAGS_MODULE         += -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs
>  cflags-y += -ffreestanding
>  cflags-y += $(call cc-option, -mno-check-zero-division)
>
> +ifdef CONFIG_PHYSICAL_START
> +load-y         = $(CONFIG_PHYSICAL_START)
> +else
>  load-y         = 0x9000000000200000
> +endif
>  bootvars-y     = VMLINUX_LOAD_ADDRESS=$(load-y)
>
>  drivers-$(CONFIG_PCI)          += arch/loongarch/pci/
> diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
> index 20b64ac3f128..df5aea129364 100644
> --- a/arch/loongarch/kernel/Makefile
> +++ b/arch/loongarch/kernel/Makefile
> @@ -17,7 +17,8 @@ obj-$(CONFIG_CPU_HAS_FPU)     += fpu.o
>  obj-$(CONFIG_MODULES)          += module.o module-sections.o
>  obj-$(CONFIG_STACKTRACE)       += stacktrace.o
>
> -obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o
> +obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o crash.o
> +obj-$(CONFIG_CRASH_DUMP)        += crash_dump.o
>
>  obj-$(CONFIG_PROC_FS)          += proc.o
>
> diff --git a/arch/loongarch/kernel/crash.c b/arch/loongarch/kernel/crash.c
> new file mode 100644
> index 000000000000..b4f249ec6301
> --- /dev/null
> +++ b/arch/loongarch/kernel/crash.c
> @@ -0,0 +1,100 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2022 Loongson Technology Corporation Limited
> + *
> + * Derived from MIPS
> + */
> +#include <linux/kernel.h>
> +#include <linux/smp.h>
> +#include <linux/reboot.h>
> +#include <linux/crash_dump.h>
> +#include <linux/delay.h>
> +#include <linux/irq.h>
> +#include <linux/types.h>
> +#include <linux/sched.h>
> +#include <linux/sched/task_stack.h>
> +#include <asm/cacheflush.h>
> +#include <asm/kexec.h>
> +
> +static cpumask_t cpus_in_crash = CPU_MASK_NONE;
> +
> +#ifdef CONFIG_SMP
> +static void crash_shutdown_secondary(void *passed_regs)
> +{
> +       struct pt_regs *regs = passed_regs;
> +       int cpu = smp_processor_id();
> +
> +       /*
> +        * If we are passed registers, use those.  Otherwise get the
> +        * regs from the last interrupt, which should be correct, as
> +        * we are in an interrupt.  But if the regs are not there,
> +        * pull them from the top of the stack.  They are probably
> +        * wrong, but we need something to keep from crashing again.
> +        */
> +       if (!regs)
> +               regs = get_irq_regs();
> +       if (!regs)
> +               regs = task_pt_regs(current);
> +
> +       local_irq_disable();
> +       if (!cpumask_test_cpu(cpu, &cpus_in_crash))
> +               crash_save_cpu(regs, cpu);
> +       cpumask_set_cpu(cpu, &cpus_in_crash);
> +
> +       while (!atomic_read(&kexec_ready_to_reboot))
> +               cpu_relax();
> +
> +       kexec_reboot();
> +}
> +
> +/* Override the weak function in kernel/panic.c */
> +void crash_smp_send_stop(void)
> +{
> +       static int cpus_stopped;
> +       unsigned long timeout;
> +       unsigned int ncpus;
> +
> +       /*
> +        * This function can be called twice in panic path, but obviously
> +        * we execute this only once.
> +        */
> +       if (cpus_stopped)
> +               return;
> +
> +       cpus_stopped = 1;
> +
> +        /* Excluding the panic cpu */
> +       ncpus = num_online_cpus() - 1;
> +
> +       smp_call_function(crash_shutdown_secondary, NULL, 0);
> +       smp_wmb();
> +
> +       /*
> +        * The crash CPU sends an IPI and wait for other CPUs to
> +        * respond. Delay of at least 10 seconds.
> +        */
> +       pr_emerg("Sending IPI to other cpus...\n");
> +       timeout = USEC_PER_SEC * 10;
> +       while ((cpumask_weight(&cpus_in_crash) < ncpus) && timeout--) {
> +               cpu_relax();
> +               udelay(1);
> +       }
> +}
> +
> +#endif
> +
> +void machine_crash_shutdown(struct pt_regs *regs)
> +{
> +       int crashing_cpu;
> +
> +       local_irq_disable();
> +
> +       crashing_cpu = smp_processor_id();
> +       crash_save_cpu(regs, crashing_cpu);
> +
> +       /* shutdown non-crashing cpus */
> +       crash_smp_send_stop();
> +       cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
> +
> +       pr_info("Starting crashdump kernel...\n");
> +}
> diff --git a/arch/loongarch/kernel/crash_dump.c b/arch/loongarch/kernel/crash_dump.c
> new file mode 100644
> index 000000000000..13e5d2f7870d
> --- /dev/null
> +++ b/arch/loongarch/kernel/crash_dump.c
> @@ -0,0 +1,19 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <linux/highmem.h>
> +#include <linux/crash_dump.h>
> +#include <linux/io.h>
> +
> +ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn,
> +                        size_t csize, unsigned long offset)
> +{
> +       void  *vaddr;
> +
> +       if (!csize)
> +               return 0;
> +
> +       vaddr = kmap_local_pfn(pfn);
> +       csize = copy_to_iter(vaddr + offset, csize, iter);
> +       kunmap_local(vaddr);
> +
> +       return csize;
> +}
> diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c
> index 4ffcd4cd9c8c..f793a3ff09a3 100644
> --- a/arch/loongarch/kernel/machine_kexec.c
> +++ b/arch/loongarch/kernel/machine_kexec.c
> @@ -69,7 +69,7 @@ int machine_kexec_prepare(struct kimage *kimage)
>                 continue;
>         }
>
> -       /* kexec need a safe page to save reboot_code_buffer */
> +       /* kexec/kdump need a safe page to save reboot_code_buffer */
>         kimage->control_code_page = virt_to_page((void *)KEXEC_CTRL_CODE);
>
>         reboot_code_buffer =
> @@ -113,10 +113,6 @@ static void kexec_shutdown_secondary(void *)
>
>         kexec_reboot();
>  }
> -
> -void machine_crash_shutdown(struct pt_regs *regs)
> -{
> -}
>  #endif
>
>  void machine_shutdown(void)
> @@ -135,7 +131,8 @@ void machine_kexec(struct kimage *image)
>
>         jump_addr = (unsigned long)phys_to_virt(image->start);
>
> -       first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
> +       if (image->type == KEXEC_TYPE_DEFAULT)
> +               first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
>
>         /*
>          * The generic kexec code builds a page list with physical
> @@ -167,7 +164,8 @@ void machine_kexec(struct kimage *image)
>
>         /*
>          * We know we were online, and there will be no incoming IPIs at
> -        * this point.
> +        * this point. Mark online again before rebooting so that the crash
> +        * analysis tool will see us correctly.
>          */
>         set_cpu_online(smp_processor_id(), true);
>
> diff --git a/arch/loongarch/kernel/mem.c b/arch/loongarch/kernel/mem.c
> index 7423361b0ebc..c6def6ff81c8 100644
> --- a/arch/loongarch/kernel/mem.c
> +++ b/arch/loongarch/kernel/mem.c
> @@ -5,6 +5,7 @@
>  #include <linux/efi.h>
>  #include <linux/initrd.h>
>  #include <linux/memblock.h>
> +#include <linux/of_fdt.h>
>
>  #include <asm/bootinfo.h>
>  #include <asm/loongson.h>
> @@ -61,4 +62,9 @@ void __init memblock_init(void)
>
>         /* Reserve the initrd */
>         reserve_initrd_mem();
> +
> +       /* Mainly reserved memory for the elf core head */
> +       early_init_fdt_scan_reserved_mem();
> +       /* Parse linux,usable-memory-range is for crash dump kernel */
> +       early_init_dt_check_for_usable_mem_range();
>  }
> diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S
> index d1f242f74ea8..4ee5ac4ac2d7 100644
> --- a/arch/loongarch/kernel/relocate_kernel.S
> +++ b/arch/loongarch/kernel/relocate_kernel.S
> @@ -28,6 +28,12 @@ SYM_CODE_START(relocate_new_kernel)
>         move            s2, a2
>         move            s3, a3
>
> +       /*
> +        * In case of a kdump/crash kernel, the indirection page is not
> +        * populated as the kernel is directly copied to a reserved location
> +        */
> +       beqz            s2, done
> +
>  process_entry:
>         PTR_L           s4, s2, 0
>         PTR_ADDI        s2, s2, SZREG
> diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
> index f938aae3e92c..ea34b77e402f 100644
> --- a/arch/loongarch/kernel/setup.c
> +++ b/arch/loongarch/kernel/setup.c
> @@ -19,6 +19,8 @@
>  #include <linux/memblock.h>
>  #include <linux/initrd.h>
>  #include <linux/ioport.h>
> +#include <linux/kexec.h>
> +#include <linux/crash_dump.h>
>  #include <linux/root_dev.h>
>  #include <linux/console.h>
>  #include <linux/pfn.h>
> @@ -186,6 +188,50 @@ static int __init early_parse_mem(char *p)
>  }
>  early_param("mem", early_parse_mem);
>
> +static void __init loongarch_parse_crashkernel(void)
> +{
> +#ifdef CONFIG_KEXEC
> +       unsigned long long start;
> +       unsigned long long total_mem;
> +       unsigned long long crash_size, crash_base;
> +       int ret;
> +
> +       total_mem = memblock_phys_mem_size();
> +       ret = parse_crashkernel(boot_command_line, total_mem,
> +                               &crash_size, &crash_base);
> +       if (ret != 0 || crash_size <= 0)
> +               return;
> +
> +
> +       start = memblock_phys_alloc_range(crash_size, 1, crash_base,
> +                                       crash_base + crash_size);
> +       if (start != crash_base) {
> +               pr_warn("Invalid memory region reserved for crash kernel\n");
> +               return;
> +       }
> +
> +       crashk_res.start = crash_base;
> +       crashk_res.end   = crash_base + crash_size - 1;
> +#endif
> +}
> +
> +static void __init request_crashkernel(struct resource *res)
> +{
> +#ifdef CONFIG_KEXEC
> +       int ret;
> +
> +       if (crashk_res.start == crashk_res.end)
> +               return;
> +
> +       ret = request_resource(res, &crashk_res);
> +       if (!ret)
> +               pr_info("Reserving %ldMB of memory at %ldMB for crashkernel\n",
> +                       (unsigned long)((crashk_res.end -
> +                                        crashk_res.start + 1) >> 20),
> +                       (unsigned long)(crashk_res.start  >> 20));
> +#endif
> +}
> +
>  void __init platform_init(void)
>  {
>         efi_init();
> @@ -229,6 +275,8 @@ static void __init arch_mem_init(char **cmdline_p)
>
>         check_kernel_sections_mem();
>
> +       loongarch_parse_crashkernel();
> +
>         /*
>          * In order to reduce the possibility of kernel panic when failed to
>          * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate
> @@ -290,6 +338,7 @@ static void __init resource_init(void)
>                 request_resource(res, &code_resource);
>                 request_resource(res, &data_resource);
>                 request_resource(res, &bss_resource);
> +               request_crashkernel(res);
>         }
>  }
>
> diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
> index aa1c95aaf595..0e610872f3f4 100644
> --- a/arch/loongarch/kernel/traps.c
> +++ b/arch/loongarch/kernel/traps.c
> @@ -10,6 +10,7 @@
>  #include <linux/entry-common.h>
>  #include <linux/init.h>
>  #include <linux/kernel.h>
> +#include <linux/kexec.h>
>  #include <linux/module.h>
>  #include <linux/extable.h>
>  #include <linux/mm.h>
> @@ -246,6 +247,9 @@ void __noreturn die(const char *str, struct pt_regs *regs)
>
>         oops_exit();
>
> +       if (regs && kexec_should_crash(current))
> +               crash_kexec(regs);
> +
>         if (in_interrupt())
>                 panic("Fatal exception in interrupt");
>
> --
> 2.36.0
>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/3] LoongArch: Add kdump support
  2022-09-04 12:21   ` Huacai Chen
@ 2022-09-05  0:54     ` Youling Tang
  2022-09-05  1:38       ` Huacai Chen
  0 siblings, 1 reply; 20+ messages in thread
From: Youling Tang @ 2022-09-05  0:54 UTC (permalink / raw)
  To: Huacai Chen
  Cc: Baoquan He, Eric Biederman, WANG Xuerui, Vivek Goyal, Dave Young,
	Guo Ren, Jiaxun Yang, kexec, loongarch, LKML

Hi, Huacai

On 09/04/2022 08:21 PM, Huacai Chen wrote:
> Hi, Youling,
>
> I think crash.c can be merged into crash_dump.c

Most architectures only implement copy_oldmem_page() in crash_dump.c, 
I'm not sure if merging crash.c into crash_dump.c will break its 
consistency?

Thanks,
Youling

>
> Huacai
>
> On Mon, Aug 29, 2022 at 12:37 PM Youling Tang <tangyouling@loongson.cn> wrote:
>>
>> This patch adds support for kdump, the kernel will reserve a region
>> for the crash kernel and jump there on panic.
>>
>> Arch-specific functions are added to allow for implementing a crash
>> dump file interface, /proc/vmcore, which can be viewed as a ELF file.
>>
>> A user space tool, like kexec-tools, is responsible for allocating a
>> separate region for the core's ELF header within crash kdump kernel
>> memory and filling it in when executing kexec_load().
>>
>> Then, its location will be advertised to crash dump kernel via a new
>> device-tree property, "linux,elfcorehdr", and crash dump kernel preserves
>> the region for later use with fdt_reserve_elfcorehdr() at boot time.
>>
>> At the same time, it will also limit the crash kdump kernel to the
>> crashkernel area via a new device-tree property, "linux, usable-memory-range",
>> so as not to destroy the original kernel dump data.
>>
>> On crash dump kernel, /proc/vmcore will access the primary kernel's memory
>> with copy_oldmem_page().
>>
>> I tested this on  LoongArch 3A5000 machine and works as expected (Suggest
>> crashkernel parameter is "crashkernel=512M@2320M"), you may test it by
>> triggering a crash through /proc/sysrq_trigger:
>>
>>  $ sudo kexec -p /boot/vmlinux-kdump --reuse-cmdline --append="nr_cpus=1"
>>  # echo c > /proc/sysrq_trigger
>>
>> Signed-off-by: Youling Tang <tangyouling@loongson.cn>
>> ---
>>  arch/loongarch/Kconfig                  |  22 ++++++
>>  arch/loongarch/Makefile                 |   4 +
>>  arch/loongarch/kernel/Makefile          |   3 +-
>>  arch/loongarch/kernel/crash.c           | 100 ++++++++++++++++++++++++
>>  arch/loongarch/kernel/crash_dump.c      |  19 +++++
>>  arch/loongarch/kernel/machine_kexec.c   |  12 ++-
>>  arch/loongarch/kernel/mem.c             |   6 ++
>>  arch/loongarch/kernel/relocate_kernel.S |   6 ++
>>  arch/loongarch/kernel/setup.c           |  49 ++++++++++++
>>  arch/loongarch/kernel/traps.c           |   4 +
>>  10 files changed, 217 insertions(+), 8 deletions(-)
>>  create mode 100644 arch/loongarch/kernel/crash.c
>>  create mode 100644 arch/loongarch/kernel/crash_dump.c
>>
>> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
>> index 903c82fa958d..7c1b07a5b5bd 100644
>> --- a/arch/loongarch/Kconfig
>> +++ b/arch/loongarch/Kconfig
>> @@ -420,6 +420,28 @@ config KEXEC
>>
>>           The name comes from the similarity to the exec system call.
>>
>> +config CRASH_DUMP
>> +       bool "Build kdump crash kernel"
>> +       help
>> +         Generate crash dump after being started by kexec. This should
>> +         be normally only set in special crash dump kernels which are
>> +         loaded in the main kernel with kexec-tools into a specially
>> +         reserved region and then later executed after a crash by
>> +         kdump/kexec.
>> +
>> +         For more details see Documentation/admin-guide/kdump/kdump.rst
>> +
>> +config PHYSICAL_START
>> +       hex "Physical address where the kernel is loaded"
>> +       default "0x9000000091000000" if 64BIT
>> +       depends on CRASH_DUMP
>> +       help
>> +         This gives the XKPRANGE address where the kernel is loaded.
>> +         If you plan to use kernel for capturing the crash dump change
>> +         this value to start of the reserved region (the "X" value as
>> +         specified in the "crashkernel=YM@XM" command line boot parameter
>> +         passed to the panic-ed kernel).
>> +
>>  config SECCOMP
>>         bool "Enable seccomp to safely compute untrusted bytecode"
>>         depends on PROC_FS
>> diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
>> index 4bc47f47cfd8..7dabd580426d 100644
>> --- a/arch/loongarch/Makefile
>> +++ b/arch/loongarch/Makefile
>> @@ -48,7 +48,11 @@ KBUILD_CFLAGS_MODULE         += -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs
>>  cflags-y += -ffreestanding
>>  cflags-y += $(call cc-option, -mno-check-zero-division)
>>
>> +ifdef CONFIG_PHYSICAL_START
>> +load-y         = $(CONFIG_PHYSICAL_START)
>> +else
>>  load-y         = 0x9000000000200000
>> +endif
>>  bootvars-y     = VMLINUX_LOAD_ADDRESS=$(load-y)
>>
>>  drivers-$(CONFIG_PCI)          += arch/loongarch/pci/
>> diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
>> index 20b64ac3f128..df5aea129364 100644
>> --- a/arch/loongarch/kernel/Makefile
>> +++ b/arch/loongarch/kernel/Makefile
>> @@ -17,7 +17,8 @@ obj-$(CONFIG_CPU_HAS_FPU)     += fpu.o
>>  obj-$(CONFIG_MODULES)          += module.o module-sections.o
>>  obj-$(CONFIG_STACKTRACE)       += stacktrace.o
>>
>> -obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o
>> +obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o crash.o
>> +obj-$(CONFIG_CRASH_DUMP)        += crash_dump.o
>>
>>  obj-$(CONFIG_PROC_FS)          += proc.o
>>
>> diff --git a/arch/loongarch/kernel/crash.c b/arch/loongarch/kernel/crash.c
>> new file mode 100644
>> index 000000000000..b4f249ec6301
>> --- /dev/null
>> +++ b/arch/loongarch/kernel/crash.c
>> @@ -0,0 +1,100 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/*
>> + * Copyright (C) 2022 Loongson Technology Corporation Limited
>> + *
>> + * Derived from MIPS
>> + */
>> +#include <linux/kernel.h>
>> +#include <linux/smp.h>
>> +#include <linux/reboot.h>
>> +#include <linux/crash_dump.h>
>> +#include <linux/delay.h>
>> +#include <linux/irq.h>
>> +#include <linux/types.h>
>> +#include <linux/sched.h>
>> +#include <linux/sched/task_stack.h>
>> +#include <asm/cacheflush.h>
>> +#include <asm/kexec.h>
>> +
>> +static cpumask_t cpus_in_crash = CPU_MASK_NONE;
>> +
>> +#ifdef CONFIG_SMP
>> +static void crash_shutdown_secondary(void *passed_regs)
>> +{
>> +       struct pt_regs *regs = passed_regs;
>> +       int cpu = smp_processor_id();
>> +
>> +       /*
>> +        * If we are passed registers, use those.  Otherwise get the
>> +        * regs from the last interrupt, which should be correct, as
>> +        * we are in an interrupt.  But if the regs are not there,
>> +        * pull them from the top of the stack.  They are probably
>> +        * wrong, but we need something to keep from crashing again.
>> +        */
>> +       if (!regs)
>> +               regs = get_irq_regs();
>> +       if (!regs)
>> +               regs = task_pt_regs(current);
>> +
>> +       local_irq_disable();
>> +       if (!cpumask_test_cpu(cpu, &cpus_in_crash))
>> +               crash_save_cpu(regs, cpu);
>> +       cpumask_set_cpu(cpu, &cpus_in_crash);
>> +
>> +       while (!atomic_read(&kexec_ready_to_reboot))
>> +               cpu_relax();
>> +
>> +       kexec_reboot();
>> +}
>> +
>> +/* Override the weak function in kernel/panic.c */
>> +void crash_smp_send_stop(void)
>> +{
>> +       static int cpus_stopped;
>> +       unsigned long timeout;
>> +       unsigned int ncpus;
>> +
>> +       /*
>> +        * This function can be called twice in panic path, but obviously
>> +        * we execute this only once.
>> +        */
>> +       if (cpus_stopped)
>> +               return;
>> +
>> +       cpus_stopped = 1;
>> +
>> +        /* Excluding the panic cpu */
>> +       ncpus = num_online_cpus() - 1;
>> +
>> +       smp_call_function(crash_shutdown_secondary, NULL, 0);
>> +       smp_wmb();
>> +
>> +       /*
>> +        * The crash CPU sends an IPI and wait for other CPUs to
>> +        * respond. Delay of at least 10 seconds.
>> +        */
>> +       pr_emerg("Sending IPI to other cpus...\n");
>> +       timeout = USEC_PER_SEC * 10;
>> +       while ((cpumask_weight(&cpus_in_crash) < ncpus) && timeout--) {
>> +               cpu_relax();
>> +               udelay(1);
>> +       }
>> +}
>> +
>> +#endif
>> +
>> +void machine_crash_shutdown(struct pt_regs *regs)
>> +{
>> +       int crashing_cpu;
>> +
>> +       local_irq_disable();
>> +
>> +       crashing_cpu = smp_processor_id();
>> +       crash_save_cpu(regs, crashing_cpu);
>> +
>> +       /* shutdown non-crashing cpus */
>> +       crash_smp_send_stop();
>> +       cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
>> +
>> +       pr_info("Starting crashdump kernel...\n");
>> +}
>> diff --git a/arch/loongarch/kernel/crash_dump.c b/arch/loongarch/kernel/crash_dump.c
>> new file mode 100644
>> index 000000000000..13e5d2f7870d
>> --- /dev/null
>> +++ b/arch/loongarch/kernel/crash_dump.c
>> @@ -0,0 +1,19 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +#include <linux/highmem.h>
>> +#include <linux/crash_dump.h>
>> +#include <linux/io.h>
>> +
>> +ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn,
>> +                        size_t csize, unsigned long offset)
>> +{
>> +       void  *vaddr;
>> +
>> +       if (!csize)
>> +               return 0;
>> +
>> +       vaddr = kmap_local_pfn(pfn);
>> +       csize = copy_to_iter(vaddr + offset, csize, iter);
>> +       kunmap_local(vaddr);
>> +
>> +       return csize;
>> +}
>> diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c
>> index 4ffcd4cd9c8c..f793a3ff09a3 100644
>> --- a/arch/loongarch/kernel/machine_kexec.c
>> +++ b/arch/loongarch/kernel/machine_kexec.c
>> @@ -69,7 +69,7 @@ int machine_kexec_prepare(struct kimage *kimage)
>>                 continue;
>>         }
>>
>> -       /* kexec need a safe page to save reboot_code_buffer */
>> +       /* kexec/kdump need a safe page to save reboot_code_buffer */
>>         kimage->control_code_page = virt_to_page((void *)KEXEC_CTRL_CODE);
>>
>>         reboot_code_buffer =
>> @@ -113,10 +113,6 @@ static void kexec_shutdown_secondary(void *)
>>
>>         kexec_reboot();
>>  }
>> -
>> -void machine_crash_shutdown(struct pt_regs *regs)
>> -{
>> -}
>>  #endif
>>
>>  void machine_shutdown(void)
>> @@ -135,7 +131,8 @@ void machine_kexec(struct kimage *image)
>>
>>         jump_addr = (unsigned long)phys_to_virt(image->start);
>>
>> -       first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
>> +       if (image->type == KEXEC_TYPE_DEFAULT)
>> +               first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
>>
>>         /*
>>          * The generic kexec code builds a page list with physical
>> @@ -167,7 +164,8 @@ void machine_kexec(struct kimage *image)
>>
>>         /*
>>          * We know we were online, and there will be no incoming IPIs at
>> -        * this point.
>> +        * this point. Mark online again before rebooting so that the crash
>> +        * analysis tool will see us correctly.
>>          */
>>         set_cpu_online(smp_processor_id(), true);
>>
>> diff --git a/arch/loongarch/kernel/mem.c b/arch/loongarch/kernel/mem.c
>> index 7423361b0ebc..c6def6ff81c8 100644
>> --- a/arch/loongarch/kernel/mem.c
>> +++ b/arch/loongarch/kernel/mem.c
>> @@ -5,6 +5,7 @@
>>  #include <linux/efi.h>
>>  #include <linux/initrd.h>
>>  #include <linux/memblock.h>
>> +#include <linux/of_fdt.h>
>>
>>  #include <asm/bootinfo.h>
>>  #include <asm/loongson.h>
>> @@ -61,4 +62,9 @@ void __init memblock_init(void)
>>
>>         /* Reserve the initrd */
>>         reserve_initrd_mem();
>> +
>> +       /* Mainly reserved memory for the elf core head */
>> +       early_init_fdt_scan_reserved_mem();
>> +       /* Parse linux,usable-memory-range is for crash dump kernel */
>> +       early_init_dt_check_for_usable_mem_range();
>>  }
>> diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S
>> index d1f242f74ea8..4ee5ac4ac2d7 100644
>> --- a/arch/loongarch/kernel/relocate_kernel.S
>> +++ b/arch/loongarch/kernel/relocate_kernel.S
>> @@ -28,6 +28,12 @@ SYM_CODE_START(relocate_new_kernel)
>>         move            s2, a2
>>         move            s3, a3
>>
>> +       /*
>> +        * In case of a kdump/crash kernel, the indirection page is not
>> +        * populated as the kernel is directly copied to a reserved location
>> +        */
>> +       beqz            s2, done
>> +
>>  process_entry:
>>         PTR_L           s4, s2, 0
>>         PTR_ADDI        s2, s2, SZREG
>> diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
>> index f938aae3e92c..ea34b77e402f 100644
>> --- a/arch/loongarch/kernel/setup.c
>> +++ b/arch/loongarch/kernel/setup.c
>> @@ -19,6 +19,8 @@
>>  #include <linux/memblock.h>
>>  #include <linux/initrd.h>
>>  #include <linux/ioport.h>
>> +#include <linux/kexec.h>
>> +#include <linux/crash_dump.h>
>>  #include <linux/root_dev.h>
>>  #include <linux/console.h>
>>  #include <linux/pfn.h>
>> @@ -186,6 +188,50 @@ static int __init early_parse_mem(char *p)
>>  }
>>  early_param("mem", early_parse_mem);
>>
>> +static void __init loongarch_parse_crashkernel(void)
>> +{
>> +#ifdef CONFIG_KEXEC
>> +       unsigned long long start;
>> +       unsigned long long total_mem;
>> +       unsigned long long crash_size, crash_base;
>> +       int ret;
>> +
>> +       total_mem = memblock_phys_mem_size();
>> +       ret = parse_crashkernel(boot_command_line, total_mem,
>> +                               &crash_size, &crash_base);
>> +       if (ret != 0 || crash_size <= 0)
>> +               return;
>> +
>> +
>> +       start = memblock_phys_alloc_range(crash_size, 1, crash_base,
>> +                                       crash_base + crash_size);
>> +       if (start != crash_base) {
>> +               pr_warn("Invalid memory region reserved for crash kernel\n");
>> +               return;
>> +       }
>> +
>> +       crashk_res.start = crash_base;
>> +       crashk_res.end   = crash_base + crash_size - 1;
>> +#endif
>> +}
>> +
>> +static void __init request_crashkernel(struct resource *res)
>> +{
>> +#ifdef CONFIG_KEXEC
>> +       int ret;
>> +
>> +       if (crashk_res.start == crashk_res.end)
>> +               return;
>> +
>> +       ret = request_resource(res, &crashk_res);
>> +       if (!ret)
>> +               pr_info("Reserving %ldMB of memory at %ldMB for crashkernel\n",
>> +                       (unsigned long)((crashk_res.end -
>> +                                        crashk_res.start + 1) >> 20),
>> +                       (unsigned long)(crashk_res.start  >> 20));
>> +#endif
>> +}
>> +
>>  void __init platform_init(void)
>>  {
>>         efi_init();
>> @@ -229,6 +275,8 @@ static void __init arch_mem_init(char **cmdline_p)
>>
>>         check_kernel_sections_mem();
>>
>> +       loongarch_parse_crashkernel();
>> +
>>         /*
>>          * In order to reduce the possibility of kernel panic when failed to
>>          * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate
>> @@ -290,6 +338,7 @@ static void __init resource_init(void)
>>                 request_resource(res, &code_resource);
>>                 request_resource(res, &data_resource);
>>                 request_resource(res, &bss_resource);
>> +               request_crashkernel(res);
>>         }
>>  }
>>
>> diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
>> index aa1c95aaf595..0e610872f3f4 100644
>> --- a/arch/loongarch/kernel/traps.c
>> +++ b/arch/loongarch/kernel/traps.c
>> @@ -10,6 +10,7 @@
>>  #include <linux/entry-common.h>
>>  #include <linux/init.h>
>>  #include <linux/kernel.h>
>> +#include <linux/kexec.h>
>>  #include <linux/module.h>
>>  #include <linux/extable.h>
>>  #include <linux/mm.h>
>> @@ -246,6 +247,9 @@ void __noreturn die(const char *str, struct pt_regs *regs)
>>
>>         oops_exit();
>>
>> +       if (regs && kexec_should_crash(current))
>> +               crash_kexec(regs);
>> +
>>         if (in_interrupt())
>>                 panic("Fatal exception in interrupt");
>>
>> --
>> 2.36.0
>>


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/3] LoongArch: Add kexec support
  2022-08-29  4:37 ` [PATCH 1/3] LoongArch: Add kexec support Youling Tang
  2022-08-30  1:53   ` Jinyang He
@ 2022-09-05  1:01   ` Youling Tang
  1 sibling, 0 replies; 20+ messages in thread
From: Youling Tang @ 2022-09-05  1:01 UTC (permalink / raw)
  To: Huacai Chen
  Cc: Baoquan He, Eric Biederman, WANG Xuerui, Vivek Goyal, Dave Young,
	Guo Ren, Jiaxun Yang, kexec, loongarch, linux-kernel



On 08/29/2022 12:37 PM, Youling Tang wrote:
> Add three new files, kexec.h, machine_kexec.c and relocate_kernel.S to the
> LoongArch architecture that add support for the kexec re-boot mechanis
> (CONFIG_KEXEC) on LoongArch platforms.
>
> Supports loading vmlinux (vmlinux.elf) in ELF format and vmlinux.efi in
> PE format.
>
> I tested this on  LoongArch 3A5000 machine and works as expected,
>
>  $ sudo kexec -l /boot/vmlinux.efi --reuse-cmdline
>  $ sudo kexec -e
>
> Signed-off-by: Youling Tang <tangyouling@loongson.cn>
> ---
>  arch/loongarch/Kconfig                  |  11 ++
>  arch/loongarch/include/asm/kexec.h      |  58 ++++++++
>  arch/loongarch/kernel/Makefile          |   2 +
>  arch/loongarch/kernel/head.S            |   7 +-
>  arch/loongarch/kernel/machine_kexec.c   | 178 ++++++++++++++++++++++++
>  arch/loongarch/kernel/relocate_kernel.S | 125 +++++++++++++++++
>  6 files changed, 380 insertions(+), 1 deletion(-)
>  create mode 100644 arch/loongarch/include/asm/kexec.h
>  create mode 100644 arch/loongarch/kernel/machine_kexec.c
>  create mode 100644 arch/loongarch/kernel/relocate_kernel.S
>
> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
> index 45364cffc793..903c82fa958d 100644
> --- a/arch/loongarch/Kconfig
> +++ b/arch/loongarch/Kconfig
> @@ -409,6 +409,17 @@ config FORCE_MAX_ZONEORDER
>  	  The page size is not necessarily 4KB.  Keep this in mind
>  	  when choosing a value for this option.
>
> +config KEXEC
> +	bool "Kexec system call"
> +	select KEXEC_CORE
> +	help
> +	  kexec is a system call that implements the ability to shutdown your
> +	  current kernel, and to start another kernel.  It is like a reboot
> +	  but it is independent of the system firmware.   And like a reboot
> +	  you can start any kernel with it, not just Linux.
> +
> +	  The name comes from the similarity to the exec system call.
> +
>  config SECCOMP
>  	bool "Enable seccomp to safely compute untrusted bytecode"
>  	depends on PROC_FS
> diff --git a/arch/loongarch/include/asm/kexec.h b/arch/loongarch/include/asm/kexec.h
> new file mode 100644
> index 000000000000..5c9e7b5eccb8
> --- /dev/null
> +++ b/arch/loongarch/include/asm/kexec.h
> @@ -0,0 +1,58 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * kexec.h for kexec
> + *
> + * Copyright (C) 2022 Loongson Technology Corporation Limited
> + */
> +
> +#ifndef _ASM_KEXEC_H
> +#define _ASM_KEXEC_H
> +
> +#include <asm/stacktrace.h>
> +#include <asm/page.h>
> +
> +/* Maximum physical address we can use pages from */
> +#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
> +/* Maximum address we can reach in physical address mode */
> +#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
> + /* Maximum address we can use for the control code buffer */
> +#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL)
> +
> +/* Reserve a page for the control code buffer */
> +#define KEXEC_CONTROL_PAGE_SIZE PAGE_SIZE
> +
> +/* The native architecture */
> +#define KEXEC_ARCH KEXEC_ARCH_LOONGARCH
> +
> +static inline void crash_setup_regs(struct pt_regs *newregs,
> +				    struct pt_regs *oldregs)
> +{
> +	if (oldregs)
> +		memcpy(newregs, oldregs, sizeof(*newregs));
> +	else
> +		prepare_frametrace(newregs);
> +}
> +
> +#define ARCH_HAS_KIMAGE_ARCH
> +
> +struct kimage_arch {
> +	unsigned long boot_flag;
> +	unsigned long fdt_addr;
> +};
> +
> +typedef void (*do_kexec_t)(unsigned long boot_flag,
> +			   unsigned long fdt_addr,
> +			   unsigned long first_ind_entry,
> +			   unsigned long jump_addr);
> +
> +struct kimage;
> +extern const unsigned char relocate_new_kernel[];
> +extern const size_t relocate_new_kernel_size;
> +
> +#ifdef CONFIG_SMP
> +extern atomic_t kexec_ready_to_reboot;
> +extern const unsigned char kexec_smp_wait[];
> +extern void kexec_reboot(void);
> +#endif
> +
> +#endif /* !_ASM_KEXEC_H */
> diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
> index a213e994db68..20b64ac3f128 100644
> --- a/arch/loongarch/kernel/Makefile
> +++ b/arch/loongarch/kernel/Makefile
> @@ -17,6 +17,8 @@ obj-$(CONFIG_CPU_HAS_FPU)	+= fpu.o
>  obj-$(CONFIG_MODULES)		+= module.o module-sections.o
>  obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
>
> +obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o
> +
>  obj-$(CONFIG_PROC_FS)		+= proc.o
>
>  obj-$(CONFIG_SMP)		+= smp.o
> diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S
> index 01bac62a6442..22bdf4928325 100644
> --- a/arch/loongarch/kernel/head.S
> +++ b/arch/loongarch/kernel/head.S
> @@ -20,7 +20,12 @@
>
>  _head:
>  	.word	MZ_MAGIC		/* "MZ", MS-DOS header */
> -	.org	0x3c			/* 0x04 ~ 0x3b reserved */
> +	.org	0x8
> +	.quad	0			/* Image load offset from start of RAM */
> +	.dword	_end - _text		/* Effective size of kernel image */
> +	.quad	0
> +	.dword	kernel_entry		/* Kernel entry point */
> +	.org	0x3c			/* 0x28 ~ 0x3b reserved */
>  	.long	pe_header - _head	/* Offset to the PE header */
>
>  pe_header:
> diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c
> new file mode 100644
> index 000000000000..4ffcd4cd9c8c
> --- /dev/null
> +++ b/arch/loongarch/kernel/machine_kexec.c
> @@ -0,0 +1,178 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * machine_kexec.c for kexec
> + *
> + * Copyright (C) 2022 Loongson Technology Corporation Limited
> + */
> +#include <linux/compiler.h>
> +#include <linux/cpu.h>
> +#include <linux/kexec.h>
> +#include <linux/mm.h>
> +#include <linux/delay.h>
> +#include <linux/libfdt.h>
> +#include <linux/of_fdt.h>
> +
> +#include <asm/bootinfo.h>
> +#include <asm/cacheflush.h>
> +#include <asm/page.h>
> +
> +/* 0x100000 ~ 0x200000 is safe */
> +#define KEXEC_CTRL_CODE	TO_CACHE(0x100000UL)
> +#define KEXEC_BLOB_ADDR	TO_CACHE(0x108000UL)
> +
> +static unsigned long reboot_code_buffer;
> +#ifdef CONFIG_SMP
> +void (*relocated_kexec_smp_wait)(void *);
> +atomic_t kexec_ready_to_reboot = ATOMIC_INIT(0);
> +#endif
> +
> +static unsigned long jump_addr;
> +static unsigned long first_ind_entry;
> +static unsigned long boot_flag;
> +static unsigned long fdt_addr;
> +
> +static void kexec_image_info(const struct kimage *kimage)
> +{
> +	unsigned long i;
> +
> +	pr_debug("kexec kimage info:\n");
> +	pr_debug("\ttype:        %d\n", kimage->type);
> +	pr_debug("\tstart:       %lx\n", kimage->start);
> +	pr_debug("\thead:        %lx\n", kimage->head);
> +	pr_debug("\tnr_segments: %lu\n", kimage->nr_segments);
> +
> +	for (i = 0; i < kimage->nr_segments; i++) {
> +		pr_debug("\t    segment[%lu]: %016lx - %016lx", i,
> +			kimage->segment[i].mem,
> +			kimage->segment[i].mem + kimage->segment[i].memsz);
> +		pr_debug("\t\t0x%lx bytes, %lu pages\n",
> +			(unsigned long)kimage->segment[i].memsz,
> +			(unsigned long)kimage->segment[i].memsz /  PAGE_SIZE);
> +	}
> +}
> +
> +int machine_kexec_prepare(struct kimage *kimage)
> +{
> +	int i;
> +	void *dtb = (void *)KEXEC_BLOB_ADDR;
> +
> +	kexec_image_info(kimage);
> +
> +	/* Find the Flattened Device Tree */
> +	for (i = 0; i < kimage->nr_segments; i++) {
> +		if (!fdt_check_header(kimage->segment[i].buf)) {
> +			memcpy(dtb, kimage->segment[i].buf, SZ_64K);
> +			kimage->arch.boot_flag = fw_arg0;
> +			kimage->arch.fdt_addr = (unsigned long) dtb;
> +			break;
> +		}
> +		continue;
> +	}
> +
> +	/* kexec need a safe page to save reboot_code_buffer */
> +	kimage->control_code_page = virt_to_page((void *)KEXEC_CTRL_CODE);
> +
> +	reboot_code_buffer =
> +	  (unsigned long)page_address(kimage->control_code_page);
> +	memcpy((void *)reboot_code_buffer, relocate_new_kernel,
> +	       relocate_new_kernel_size);
> +
> +	/* All secondary cpus now may jump to kexec_smp_wait cycle */
> +	relocated_kexec_smp_wait = reboot_code_buffer +
> +		(void *)(kexec_smp_wait - relocate_new_kernel);
> +
> +	return 0;
> +}
> +
> +void machine_kexec_cleanup(struct kimage *kimage)
> +{
> +}
> +
> +#ifdef CONFIG_SMP
> +void kexec_reboot(void)
> +{
> +	do_kexec_t do_kexec = NULL;
> +
> +	/* All secondary cpus go to kexec_smp_wait */
> +	if (smp_processor_id() > 0) {
> +		relocated_kexec_smp_wait(NULL);
> +		unreachable();
> +	}
> +
> +	do_kexec = (void *)reboot_code_buffer;
> +	do_kexec(boot_flag, fdt_addr, first_ind_entry, jump_addr);
> +
> +	unreachable();
> +}

self-check,
kexec_reboot() is in! SMP needs to be used, modified as follows:

void kexec_reboot(void)
{
         do_kexec_t do_kexec = NULL;

#ifdef CONFIG_SMP
         /* All secondary cpus go to kexec_smp_wait */
         if (smp_processor_id() > 0) {
                 relocated_kexec_smp_wait(NULL);
                 unreachable();
         }
#endif

         do_kexec = (void *)reboot_code_buffer;
         do_kexec(boot_flag, fdt_addr, first_ind_entry, jump_addr);

         unreachable();
}

Youling


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/3] LoongArch: Add kdump support
  2022-09-05  0:54     ` Youling Tang
@ 2022-09-05  1:38       ` Huacai Chen
  2022-09-05  2:04         ` Youling Tang
  0 siblings, 1 reply; 20+ messages in thread
From: Huacai Chen @ 2022-09-05  1:38 UTC (permalink / raw)
  To: Youling Tang
  Cc: Baoquan He, Eric Biederman, WANG Xuerui, Vivek Goyal, Dave Young,
	Guo Ren, Jiaxun Yang, kexec, loongarch, LKML

Hi, Youling,

On Mon, Sep 5, 2022 at 8:54 AM Youling Tang <tangyouling@loongson.cn> wrote:
>
> Hi, Huacai
>
> On 09/04/2022 08:21 PM, Huacai Chen wrote:
> > Hi, Youling,
> >
> > I think crash.c can be merged into crash_dump.c
>
> Most architectures only implement copy_oldmem_page() in crash_dump.c,
> I'm not sure if merging crash.c into crash_dump.c will break its
> consistency?
>
> Thanks,
> Youling
Yes, you are right, crash.c cannot be merged into crash_dump.c, but it
can be merged into machine_kexec.c, as arm64 and riscv do.

Huacai
>
> >
> > Huacai
> >
> > On Mon, Aug 29, 2022 at 12:37 PM Youling Tang <tangyouling@loongson.cn> wrote:
> >>
> >> This patch adds support for kdump, the kernel will reserve a region
> >> for the crash kernel and jump there on panic.
> >>
> >> Arch-specific functions are added to allow for implementing a crash
> >> dump file interface, /proc/vmcore, which can be viewed as a ELF file.
> >>
> >> A user space tool, like kexec-tools, is responsible for allocating a
> >> separate region for the core's ELF header within crash kdump kernel
> >> memory and filling it in when executing kexec_load().
> >>
> >> Then, its location will be advertised to crash dump kernel via a new
> >> device-tree property, "linux,elfcorehdr", and crash dump kernel preserves
> >> the region for later use with fdt_reserve_elfcorehdr() at boot time.
> >>
> >> At the same time, it will also limit the crash kdump kernel to the
> >> crashkernel area via a new device-tree property, "linux, usable-memory-range",
> >> so as not to destroy the original kernel dump data.
> >>
> >> On crash dump kernel, /proc/vmcore will access the primary kernel's memory
> >> with copy_oldmem_page().
> >>
> >> I tested this on  LoongArch 3A5000 machine and works as expected (Suggest
> >> crashkernel parameter is "crashkernel=512M@2320M"), you may test it by
> >> triggering a crash through /proc/sysrq_trigger:
> >>
> >>  $ sudo kexec -p /boot/vmlinux-kdump --reuse-cmdline --append="nr_cpus=1"
> >>  # echo c > /proc/sysrq_trigger
> >>
> >> Signed-off-by: Youling Tang <tangyouling@loongson.cn>
> >> ---
> >>  arch/loongarch/Kconfig                  |  22 ++++++
> >>  arch/loongarch/Makefile                 |   4 +
> >>  arch/loongarch/kernel/Makefile          |   3 +-
> >>  arch/loongarch/kernel/crash.c           | 100 ++++++++++++++++++++++++
> >>  arch/loongarch/kernel/crash_dump.c      |  19 +++++
> >>  arch/loongarch/kernel/machine_kexec.c   |  12 ++-
> >>  arch/loongarch/kernel/mem.c             |   6 ++
> >>  arch/loongarch/kernel/relocate_kernel.S |   6 ++
> >>  arch/loongarch/kernel/setup.c           |  49 ++++++++++++
> >>  arch/loongarch/kernel/traps.c           |   4 +
> >>  10 files changed, 217 insertions(+), 8 deletions(-)
> >>  create mode 100644 arch/loongarch/kernel/crash.c
> >>  create mode 100644 arch/loongarch/kernel/crash_dump.c
> >>
> >> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
> >> index 903c82fa958d..7c1b07a5b5bd 100644
> >> --- a/arch/loongarch/Kconfig
> >> +++ b/arch/loongarch/Kconfig
> >> @@ -420,6 +420,28 @@ config KEXEC
> >>
> >>           The name comes from the similarity to the exec system call.
> >>
> >> +config CRASH_DUMP
> >> +       bool "Build kdump crash kernel"
> >> +       help
> >> +         Generate crash dump after being started by kexec. This should
> >> +         be normally only set in special crash dump kernels which are
> >> +         loaded in the main kernel with kexec-tools into a specially
> >> +         reserved region and then later executed after a crash by
> >> +         kdump/kexec.
> >> +
> >> +         For more details see Documentation/admin-guide/kdump/kdump.rst
> >> +
> >> +config PHYSICAL_START
> >> +       hex "Physical address where the kernel is loaded"
> >> +       default "0x9000000091000000" if 64BIT
> >> +       depends on CRASH_DUMP
> >> +       help
> >> +         This gives the XKPRANGE address where the kernel is loaded.
> >> +         If you plan to use kernel for capturing the crash dump change
> >> +         this value to start of the reserved region (the "X" value as
> >> +         specified in the "crashkernel=YM@XM" command line boot parameter
> >> +         passed to the panic-ed kernel).
> >> +
> >>  config SECCOMP
> >>         bool "Enable seccomp to safely compute untrusted bytecode"
> >>         depends on PROC_FS
> >> diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
> >> index 4bc47f47cfd8..7dabd580426d 100644
> >> --- a/arch/loongarch/Makefile
> >> +++ b/arch/loongarch/Makefile
> >> @@ -48,7 +48,11 @@ KBUILD_CFLAGS_MODULE         += -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs
> >>  cflags-y += -ffreestanding
> >>  cflags-y += $(call cc-option, -mno-check-zero-division)
> >>
> >> +ifdef CONFIG_PHYSICAL_START
> >> +load-y         = $(CONFIG_PHYSICAL_START)
> >> +else
> >>  load-y         = 0x9000000000200000
> >> +endif
> >>  bootvars-y     = VMLINUX_LOAD_ADDRESS=$(load-y)
> >>
> >>  drivers-$(CONFIG_PCI)          += arch/loongarch/pci/
> >> diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
> >> index 20b64ac3f128..df5aea129364 100644
> >> --- a/arch/loongarch/kernel/Makefile
> >> +++ b/arch/loongarch/kernel/Makefile
> >> @@ -17,7 +17,8 @@ obj-$(CONFIG_CPU_HAS_FPU)     += fpu.o
> >>  obj-$(CONFIG_MODULES)          += module.o module-sections.o
> >>  obj-$(CONFIG_STACKTRACE)       += stacktrace.o
> >>
> >> -obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o
> >> +obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o crash.o
> >> +obj-$(CONFIG_CRASH_DUMP)        += crash_dump.o
> >>
> >>  obj-$(CONFIG_PROC_FS)          += proc.o
> >>
> >> diff --git a/arch/loongarch/kernel/crash.c b/arch/loongarch/kernel/crash.c
> >> new file mode 100644
> >> index 000000000000..b4f249ec6301
> >> --- /dev/null
> >> +++ b/arch/loongarch/kernel/crash.c
> >> @@ -0,0 +1,100 @@
> >> +// SPDX-License-Identifier: GPL-2.0
> >> +/*
> >> + * Copyright (C) 2022 Loongson Technology Corporation Limited
> >> + *
> >> + * Derived from MIPS
> >> + */
> >> +#include <linux/kernel.h>
> >> +#include <linux/smp.h>
> >> +#include <linux/reboot.h>
> >> +#include <linux/crash_dump.h>
> >> +#include <linux/delay.h>
> >> +#include <linux/irq.h>
> >> +#include <linux/types.h>
> >> +#include <linux/sched.h>
> >> +#include <linux/sched/task_stack.h>
> >> +#include <asm/cacheflush.h>
> >> +#include <asm/kexec.h>
> >> +
> >> +static cpumask_t cpus_in_crash = CPU_MASK_NONE;
> >> +
> >> +#ifdef CONFIG_SMP
> >> +static void crash_shutdown_secondary(void *passed_regs)
> >> +{
> >> +       struct pt_regs *regs = passed_regs;
> >> +       int cpu = smp_processor_id();
> >> +
> >> +       /*
> >> +        * If we are passed registers, use those.  Otherwise get the
> >> +        * regs from the last interrupt, which should be correct, as
> >> +        * we are in an interrupt.  But if the regs are not there,
> >> +        * pull them from the top of the stack.  They are probably
> >> +        * wrong, but we need something to keep from crashing again.
> >> +        */
> >> +       if (!regs)
> >> +               regs = get_irq_regs();
> >> +       if (!regs)
> >> +               regs = task_pt_regs(current);
> >> +
> >> +       local_irq_disable();
> >> +       if (!cpumask_test_cpu(cpu, &cpus_in_crash))
> >> +               crash_save_cpu(regs, cpu);
> >> +       cpumask_set_cpu(cpu, &cpus_in_crash);
> >> +
> >> +       while (!atomic_read(&kexec_ready_to_reboot))
> >> +               cpu_relax();
> >> +
> >> +       kexec_reboot();
> >> +}
> >> +
> >> +/* Override the weak function in kernel/panic.c */
> >> +void crash_smp_send_stop(void)
> >> +{
> >> +       static int cpus_stopped;
> >> +       unsigned long timeout;
> >> +       unsigned int ncpus;
> >> +
> >> +       /*
> >> +        * This function can be called twice in panic path, but obviously
> >> +        * we execute this only once.
> >> +        */
> >> +       if (cpus_stopped)
> >> +               return;
> >> +
> >> +       cpus_stopped = 1;
> >> +
> >> +        /* Excluding the panic cpu */
> >> +       ncpus = num_online_cpus() - 1;
> >> +
> >> +       smp_call_function(crash_shutdown_secondary, NULL, 0);
> >> +       smp_wmb();
> >> +
> >> +       /*
> >> +        * The crash CPU sends an IPI and wait for other CPUs to
> >> +        * respond. Delay of at least 10 seconds.
> >> +        */
> >> +       pr_emerg("Sending IPI to other cpus...\n");
> >> +       timeout = USEC_PER_SEC * 10;
> >> +       while ((cpumask_weight(&cpus_in_crash) < ncpus) && timeout--) {
> >> +               cpu_relax();
> >> +               udelay(1);
> >> +       }
> >> +}
> >> +
> >> +#endif
> >> +
> >> +void machine_crash_shutdown(struct pt_regs *regs)
> >> +{
> >> +       int crashing_cpu;
> >> +
> >> +       local_irq_disable();
> >> +
> >> +       crashing_cpu = smp_processor_id();
> >> +       crash_save_cpu(regs, crashing_cpu);
> >> +
> >> +       /* shutdown non-crashing cpus */
> >> +       crash_smp_send_stop();
> >> +       cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
> >> +
> >> +       pr_info("Starting crashdump kernel...\n");
> >> +}
> >> diff --git a/arch/loongarch/kernel/crash_dump.c b/arch/loongarch/kernel/crash_dump.c
> >> new file mode 100644
> >> index 000000000000..13e5d2f7870d
> >> --- /dev/null
> >> +++ b/arch/loongarch/kernel/crash_dump.c
> >> @@ -0,0 +1,19 @@
> >> +// SPDX-License-Identifier: GPL-2.0
> >> +#include <linux/highmem.h>
> >> +#include <linux/crash_dump.h>
> >> +#include <linux/io.h>
> >> +
> >> +ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn,
> >> +                        size_t csize, unsigned long offset)
> >> +{
> >> +       void  *vaddr;
> >> +
> >> +       if (!csize)
> >> +               return 0;
> >> +
> >> +       vaddr = kmap_local_pfn(pfn);
> >> +       csize = copy_to_iter(vaddr + offset, csize, iter);
> >> +       kunmap_local(vaddr);
> >> +
> >> +       return csize;
> >> +}
> >> diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c
> >> index 4ffcd4cd9c8c..f793a3ff09a3 100644
> >> --- a/arch/loongarch/kernel/machine_kexec.c
> >> +++ b/arch/loongarch/kernel/machine_kexec.c
> >> @@ -69,7 +69,7 @@ int machine_kexec_prepare(struct kimage *kimage)
> >>                 continue;
> >>         }
> >>
> >> -       /* kexec need a safe page to save reboot_code_buffer */
> >> +       /* kexec/kdump need a safe page to save reboot_code_buffer */
> >>         kimage->control_code_page = virt_to_page((void *)KEXEC_CTRL_CODE);
> >>
> >>         reboot_code_buffer =
> >> @@ -113,10 +113,6 @@ static void kexec_shutdown_secondary(void *)
> >>
> >>         kexec_reboot();
> >>  }
> >> -
> >> -void machine_crash_shutdown(struct pt_regs *regs)
> >> -{
> >> -}
> >>  #endif
> >>
> >>  void machine_shutdown(void)
> >> @@ -135,7 +131,8 @@ void machine_kexec(struct kimage *image)
> >>
> >>         jump_addr = (unsigned long)phys_to_virt(image->start);
> >>
> >> -       first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
> >> +       if (image->type == KEXEC_TYPE_DEFAULT)
> >> +               first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
> >>
> >>         /*
> >>          * The generic kexec code builds a page list with physical
> >> @@ -167,7 +164,8 @@ void machine_kexec(struct kimage *image)
> >>
> >>         /*
> >>          * We know we were online, and there will be no incoming IPIs at
> >> -        * this point.
> >> +        * this point. Mark online again before rebooting so that the crash
> >> +        * analysis tool will see us correctly.
> >>          */
> >>         set_cpu_online(smp_processor_id(), true);
> >>
> >> diff --git a/arch/loongarch/kernel/mem.c b/arch/loongarch/kernel/mem.c
> >> index 7423361b0ebc..c6def6ff81c8 100644
> >> --- a/arch/loongarch/kernel/mem.c
> >> +++ b/arch/loongarch/kernel/mem.c
> >> @@ -5,6 +5,7 @@
> >>  #include <linux/efi.h>
> >>  #include <linux/initrd.h>
> >>  #include <linux/memblock.h>
> >> +#include <linux/of_fdt.h>
> >>
> >>  #include <asm/bootinfo.h>
> >>  #include <asm/loongson.h>
> >> @@ -61,4 +62,9 @@ void __init memblock_init(void)
> >>
> >>         /* Reserve the initrd */
> >>         reserve_initrd_mem();
> >> +
> >> +       /* Mainly reserved memory for the elf core head */
> >> +       early_init_fdt_scan_reserved_mem();
> >> +       /* Parse linux,usable-memory-range is for crash dump kernel */
> >> +       early_init_dt_check_for_usable_mem_range();
> >>  }
> >> diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S
> >> index d1f242f74ea8..4ee5ac4ac2d7 100644
> >> --- a/arch/loongarch/kernel/relocate_kernel.S
> >> +++ b/arch/loongarch/kernel/relocate_kernel.S
> >> @@ -28,6 +28,12 @@ SYM_CODE_START(relocate_new_kernel)
> >>         move            s2, a2
> >>         move            s3, a3
> >>
> >> +       /*
> >> +        * In case of a kdump/crash kernel, the indirection page is not
> >> +        * populated as the kernel is directly copied to a reserved location
> >> +        */
> >> +       beqz            s2, done
> >> +
> >>  process_entry:
> >>         PTR_L           s4, s2, 0
> >>         PTR_ADDI        s2, s2, SZREG
> >> diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
> >> index f938aae3e92c..ea34b77e402f 100644
> >> --- a/arch/loongarch/kernel/setup.c
> >> +++ b/arch/loongarch/kernel/setup.c
> >> @@ -19,6 +19,8 @@
> >>  #include <linux/memblock.h>
> >>  #include <linux/initrd.h>
> >>  #include <linux/ioport.h>
> >> +#include <linux/kexec.h>
> >> +#include <linux/crash_dump.h>
> >>  #include <linux/root_dev.h>
> >>  #include <linux/console.h>
> >>  #include <linux/pfn.h>
> >> @@ -186,6 +188,50 @@ static int __init early_parse_mem(char *p)
> >>  }
> >>  early_param("mem", early_parse_mem);
> >>
> >> +static void __init loongarch_parse_crashkernel(void)
> >> +{
> >> +#ifdef CONFIG_KEXEC
> >> +       unsigned long long start;
> >> +       unsigned long long total_mem;
> >> +       unsigned long long crash_size, crash_base;
> >> +       int ret;
> >> +
> >> +       total_mem = memblock_phys_mem_size();
> >> +       ret = parse_crashkernel(boot_command_line, total_mem,
> >> +                               &crash_size, &crash_base);
> >> +       if (ret != 0 || crash_size <= 0)
> >> +               return;
> >> +
> >> +
> >> +       start = memblock_phys_alloc_range(crash_size, 1, crash_base,
> >> +                                       crash_base + crash_size);
> >> +       if (start != crash_base) {
> >> +               pr_warn("Invalid memory region reserved for crash kernel\n");
> >> +               return;
> >> +       }
> >> +
> >> +       crashk_res.start = crash_base;
> >> +       crashk_res.end   = crash_base + crash_size - 1;
> >> +#endif
> >> +}
> >> +
> >> +static void __init request_crashkernel(struct resource *res)
> >> +{
> >> +#ifdef CONFIG_KEXEC
> >> +       int ret;
> >> +
> >> +       if (crashk_res.start == crashk_res.end)
> >> +               return;
> >> +
> >> +       ret = request_resource(res, &crashk_res);
> >> +       if (!ret)
> >> +               pr_info("Reserving %ldMB of memory at %ldMB for crashkernel\n",
> >> +                       (unsigned long)((crashk_res.end -
> >> +                                        crashk_res.start + 1) >> 20),
> >> +                       (unsigned long)(crashk_res.start  >> 20));
> >> +#endif
> >> +}
> >> +
> >>  void __init platform_init(void)
> >>  {
> >>         efi_init();
> >> @@ -229,6 +275,8 @@ static void __init arch_mem_init(char **cmdline_p)
> >>
> >>         check_kernel_sections_mem();
> >>
> >> +       loongarch_parse_crashkernel();
> >> +
> >>         /*
> >>          * In order to reduce the possibility of kernel panic when failed to
> >>          * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate
> >> @@ -290,6 +338,7 @@ static void __init resource_init(void)
> >>                 request_resource(res, &code_resource);
> >>                 request_resource(res, &data_resource);
> >>                 request_resource(res, &bss_resource);
> >> +               request_crashkernel(res);
> >>         }
> >>  }
> >>
> >> diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
> >> index aa1c95aaf595..0e610872f3f4 100644
> >> --- a/arch/loongarch/kernel/traps.c
> >> +++ b/arch/loongarch/kernel/traps.c
> >> @@ -10,6 +10,7 @@
> >>  #include <linux/entry-common.h>
> >>  #include <linux/init.h>
> >>  #include <linux/kernel.h>
> >> +#include <linux/kexec.h>
> >>  #include <linux/module.h>
> >>  #include <linux/extable.h>
> >>  #include <linux/mm.h>
> >> @@ -246,6 +247,9 @@ void __noreturn die(const char *str, struct pt_regs *regs)
> >>
> >>         oops_exit();
> >>
> >> +       if (regs && kexec_should_crash(current))
> >> +               crash_kexec(regs);
> >> +
> >>         if (in_interrupt())
> >>                 panic("Fatal exception in interrupt");
> >>
> >> --
> >> 2.36.0
> >>
>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/3] LoongArch: Add kdump support
  2022-09-05  1:38       ` Huacai Chen
@ 2022-09-05  2:04         ` Youling Tang
  2022-09-05  2:14           ` Huacai Chen
  0 siblings, 1 reply; 20+ messages in thread
From: Youling Tang @ 2022-09-05  2:04 UTC (permalink / raw)
  To: Huacai Chen
  Cc: Baoquan He, Eric Biederman, WANG Xuerui, Vivek Goyal, Dave Young,
	Guo Ren, Jiaxun Yang, kexec, loongarch, LKML

Hi, Huacai

On 09/05/2022 09:38 AM, Huacai Chen wrote:
> Hi, Youling,
>
> On Mon, Sep 5, 2022 at 8:54 AM Youling Tang <tangyouling@loongson.cn> wrote:
>>
>> Hi, Huacai
>>
>> On 09/04/2022 08:21 PM, Huacai Chen wrote:
>>> Hi, Youling,
>>>
>>> I think crash.c can be merged into crash_dump.c
>>
>> Most architectures only implement copy_oldmem_page() in crash_dump.c,
>> I'm not sure if merging crash.c into crash_dump.c will break its
>> consistency?
>>
>> Thanks,
>> Youling
> Yes, you are right, crash.c cannot be merged into crash_dump.c, but it
> can be merged into machine_kexec.c, as arm64 and riscv do.

For arm64, machine_crash_shutdown() is placed in machine_kexec.c, and
crash_smp_send_stop is placed in smp.c. If crash.c needs to be merged
into machine_kexec.c, should crash_shutdown_secondary and
crash_smp_send_stop be placed in smp.c?

Youling.
>
> Huacai
>>
>>>
>>> Huacai
>>>
>>> On Mon, Aug 29, 2022 at 12:37 PM Youling Tang <tangyouling@loongson.cn> wrote:
>>>>
>>>> This patch adds support for kdump, the kernel will reserve a region
>>>> for the crash kernel and jump there on panic.
>>>>
>>>> Arch-specific functions are added to allow for implementing a crash
>>>> dump file interface, /proc/vmcore, which can be viewed as a ELF file.
>>>>
>>>> A user space tool, like kexec-tools, is responsible for allocating a
>>>> separate region for the core's ELF header within crash kdump kernel
>>>> memory and filling it in when executing kexec_load().
>>>>
>>>> Then, its location will be advertised to crash dump kernel via a new
>>>> device-tree property, "linux,elfcorehdr", and crash dump kernel preserves
>>>> the region for later use with fdt_reserve_elfcorehdr() at boot time.
>>>>
>>>> At the same time, it will also limit the crash kdump kernel to the
>>>> crashkernel area via a new device-tree property, "linux, usable-memory-range",
>>>> so as not to destroy the original kernel dump data.
>>>>
>>>> On crash dump kernel, /proc/vmcore will access the primary kernel's memory
>>>> with copy_oldmem_page().
>>>>
>>>> I tested this on  LoongArch 3A5000 machine and works as expected (Suggest
>>>> crashkernel parameter is "crashkernel=512M@2320M"), you may test it by
>>>> triggering a crash through /proc/sysrq_trigger:
>>>>
>>>>  $ sudo kexec -p /boot/vmlinux-kdump --reuse-cmdline --append="nr_cpus=1"
>>>>  # echo c > /proc/sysrq_trigger
>>>>
>>>> Signed-off-by: Youling Tang <tangyouling@loongson.cn>
>>>> ---
>>>>  arch/loongarch/Kconfig                  |  22 ++++++
>>>>  arch/loongarch/Makefile                 |   4 +
>>>>  arch/loongarch/kernel/Makefile          |   3 +-
>>>>  arch/loongarch/kernel/crash.c           | 100 ++++++++++++++++++++++++
>>>>  arch/loongarch/kernel/crash_dump.c      |  19 +++++
>>>>  arch/loongarch/kernel/machine_kexec.c   |  12 ++-
>>>>  arch/loongarch/kernel/mem.c             |   6 ++
>>>>  arch/loongarch/kernel/relocate_kernel.S |   6 ++
>>>>  arch/loongarch/kernel/setup.c           |  49 ++++++++++++
>>>>  arch/loongarch/kernel/traps.c           |   4 +
>>>>  10 files changed, 217 insertions(+), 8 deletions(-)
>>>>  create mode 100644 arch/loongarch/kernel/crash.c
>>>>  create mode 100644 arch/loongarch/kernel/crash_dump.c
>>>>
>>>> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
>>>> index 903c82fa958d..7c1b07a5b5bd 100644
>>>> --- a/arch/loongarch/Kconfig
>>>> +++ b/arch/loongarch/Kconfig
>>>> @@ -420,6 +420,28 @@ config KEXEC
>>>>
>>>>           The name comes from the similarity to the exec system call.
>>>>
>>>> +config CRASH_DUMP
>>>> +       bool "Build kdump crash kernel"
>>>> +       help
>>>> +         Generate crash dump after being started by kexec. This should
>>>> +         be normally only set in special crash dump kernels which are
>>>> +         loaded in the main kernel with kexec-tools into a specially
>>>> +         reserved region and then later executed after a crash by
>>>> +         kdump/kexec.
>>>> +
>>>> +         For more details see Documentation/admin-guide/kdump/kdump.rst
>>>> +
>>>> +config PHYSICAL_START
>>>> +       hex "Physical address where the kernel is loaded"
>>>> +       default "0x9000000091000000" if 64BIT
>>>> +       depends on CRASH_DUMP
>>>> +       help
>>>> +         This gives the XKPRANGE address where the kernel is loaded.
>>>> +         If you plan to use kernel for capturing the crash dump change
>>>> +         this value to start of the reserved region (the "X" value as
>>>> +         specified in the "crashkernel=YM@XM" command line boot parameter
>>>> +         passed to the panic-ed kernel).
>>>> +
>>>>  config SECCOMP
>>>>         bool "Enable seccomp to safely compute untrusted bytecode"
>>>>         depends on PROC_FS
>>>> diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
>>>> index 4bc47f47cfd8..7dabd580426d 100644
>>>> --- a/arch/loongarch/Makefile
>>>> +++ b/arch/loongarch/Makefile
>>>> @@ -48,7 +48,11 @@ KBUILD_CFLAGS_MODULE         += -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs
>>>>  cflags-y += -ffreestanding
>>>>  cflags-y += $(call cc-option, -mno-check-zero-division)
>>>>
>>>> +ifdef CONFIG_PHYSICAL_START
>>>> +load-y         = $(CONFIG_PHYSICAL_START)
>>>> +else
>>>>  load-y         = 0x9000000000200000
>>>> +endif
>>>>  bootvars-y     = VMLINUX_LOAD_ADDRESS=$(load-y)
>>>>
>>>>  drivers-$(CONFIG_PCI)          += arch/loongarch/pci/
>>>> diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
>>>> index 20b64ac3f128..df5aea129364 100644
>>>> --- a/arch/loongarch/kernel/Makefile
>>>> +++ b/arch/loongarch/kernel/Makefile
>>>> @@ -17,7 +17,8 @@ obj-$(CONFIG_CPU_HAS_FPU)     += fpu.o
>>>>  obj-$(CONFIG_MODULES)          += module.o module-sections.o
>>>>  obj-$(CONFIG_STACKTRACE)       += stacktrace.o
>>>>
>>>> -obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o
>>>> +obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o crash.o
>>>> +obj-$(CONFIG_CRASH_DUMP)        += crash_dump.o
>>>>
>>>>  obj-$(CONFIG_PROC_FS)          += proc.o
>>>>
>>>> diff --git a/arch/loongarch/kernel/crash.c b/arch/loongarch/kernel/crash.c
>>>> new file mode 100644
>>>> index 000000000000..b4f249ec6301
>>>> --- /dev/null
>>>> +++ b/arch/loongarch/kernel/crash.c
>>>> @@ -0,0 +1,100 @@
>>>> +// SPDX-License-Identifier: GPL-2.0
>>>> +/*
>>>> + * Copyright (C) 2022 Loongson Technology Corporation Limited
>>>> + *
>>>> + * Derived from MIPS
>>>> + */
>>>> +#include <linux/kernel.h>
>>>> +#include <linux/smp.h>
>>>> +#include <linux/reboot.h>
>>>> +#include <linux/crash_dump.h>
>>>> +#include <linux/delay.h>
>>>> +#include <linux/irq.h>
>>>> +#include <linux/types.h>
>>>> +#include <linux/sched.h>
>>>> +#include <linux/sched/task_stack.h>
>>>> +#include <asm/cacheflush.h>
>>>> +#include <asm/kexec.h>
>>>> +
>>>> +static cpumask_t cpus_in_crash = CPU_MASK_NONE;
>>>> +
>>>> +#ifdef CONFIG_SMP
>>>> +static void crash_shutdown_secondary(void *passed_regs)
>>>> +{
>>>> +       struct pt_regs *regs = passed_regs;
>>>> +       int cpu = smp_processor_id();
>>>> +
>>>> +       /*
>>>> +        * If we are passed registers, use those.  Otherwise get the
>>>> +        * regs from the last interrupt, which should be correct, as
>>>> +        * we are in an interrupt.  But if the regs are not there,
>>>> +        * pull them from the top of the stack.  They are probably
>>>> +        * wrong, but we need something to keep from crashing again.
>>>> +        */
>>>> +       if (!regs)
>>>> +               regs = get_irq_regs();
>>>> +       if (!regs)
>>>> +               regs = task_pt_regs(current);
>>>> +
>>>> +       local_irq_disable();
>>>> +       if (!cpumask_test_cpu(cpu, &cpus_in_crash))
>>>> +               crash_save_cpu(regs, cpu);
>>>> +       cpumask_set_cpu(cpu, &cpus_in_crash);
>>>> +
>>>> +       while (!atomic_read(&kexec_ready_to_reboot))
>>>> +               cpu_relax();
>>>> +
>>>> +       kexec_reboot();
>>>> +}
>>>> +
>>>> +/* Override the weak function in kernel/panic.c */
>>>> +void crash_smp_send_stop(void)
>>>> +{
>>>> +       static int cpus_stopped;
>>>> +       unsigned long timeout;
>>>> +       unsigned int ncpus;
>>>> +
>>>> +       /*
>>>> +        * This function can be called twice in panic path, but obviously
>>>> +        * we execute this only once.
>>>> +        */
>>>> +       if (cpus_stopped)
>>>> +               return;
>>>> +
>>>> +       cpus_stopped = 1;
>>>> +
>>>> +        /* Excluding the panic cpu */
>>>> +       ncpus = num_online_cpus() - 1;
>>>> +
>>>> +       smp_call_function(crash_shutdown_secondary, NULL, 0);
>>>> +       smp_wmb();
>>>> +
>>>> +       /*
>>>> +        * The crash CPU sends an IPI and wait for other CPUs to
>>>> +        * respond. Delay of at least 10 seconds.
>>>> +        */
>>>> +       pr_emerg("Sending IPI to other cpus...\n");
>>>> +       timeout = USEC_PER_SEC * 10;
>>>> +       while ((cpumask_weight(&cpus_in_crash) < ncpus) && timeout--) {
>>>> +               cpu_relax();
>>>> +               udelay(1);
>>>> +       }
>>>> +}
>>>> +
>>>> +#endif
>>>> +
>>>> +void machine_crash_shutdown(struct pt_regs *regs)
>>>> +{
>>>> +       int crashing_cpu;
>>>> +
>>>> +       local_irq_disable();
>>>> +
>>>> +       crashing_cpu = smp_processor_id();
>>>> +       crash_save_cpu(regs, crashing_cpu);
>>>> +
>>>> +       /* shutdown non-crashing cpus */
>>>> +       crash_smp_send_stop();
>>>> +       cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
>>>> +
>>>> +       pr_info("Starting crashdump kernel...\n");
>>>> +}
>>>> diff --git a/arch/loongarch/kernel/crash_dump.c b/arch/loongarch/kernel/crash_dump.c
>>>> new file mode 100644
>>>> index 000000000000..13e5d2f7870d
>>>> --- /dev/null
>>>> +++ b/arch/loongarch/kernel/crash_dump.c
>>>> @@ -0,0 +1,19 @@
>>>> +// SPDX-License-Identifier: GPL-2.0
>>>> +#include <linux/highmem.h>
>>>> +#include <linux/crash_dump.h>
>>>> +#include <linux/io.h>
>>>> +
>>>> +ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn,
>>>> +                        size_t csize, unsigned long offset)
>>>> +{
>>>> +       void  *vaddr;
>>>> +
>>>> +       if (!csize)
>>>> +               return 0;
>>>> +
>>>> +       vaddr = kmap_local_pfn(pfn);
>>>> +       csize = copy_to_iter(vaddr + offset, csize, iter);
>>>> +       kunmap_local(vaddr);
>>>> +
>>>> +       return csize;
>>>> +}
>>>> diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c
>>>> index 4ffcd4cd9c8c..f793a3ff09a3 100644
>>>> --- a/arch/loongarch/kernel/machine_kexec.c
>>>> +++ b/arch/loongarch/kernel/machine_kexec.c
>>>> @@ -69,7 +69,7 @@ int machine_kexec_prepare(struct kimage *kimage)
>>>>                 continue;
>>>>         }
>>>>
>>>> -       /* kexec need a safe page to save reboot_code_buffer */
>>>> +       /* kexec/kdump need a safe page to save reboot_code_buffer */
>>>>         kimage->control_code_page = virt_to_page((void *)KEXEC_CTRL_CODE);
>>>>
>>>>         reboot_code_buffer =
>>>> @@ -113,10 +113,6 @@ static void kexec_shutdown_secondary(void *)
>>>>
>>>>         kexec_reboot();
>>>>  }
>>>> -
>>>> -void machine_crash_shutdown(struct pt_regs *regs)
>>>> -{
>>>> -}
>>>>  #endif
>>>>
>>>>  void machine_shutdown(void)
>>>> @@ -135,7 +131,8 @@ void machine_kexec(struct kimage *image)
>>>>
>>>>         jump_addr = (unsigned long)phys_to_virt(image->start);
>>>>
>>>> -       first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
>>>> +       if (image->type == KEXEC_TYPE_DEFAULT)
>>>> +               first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
>>>>
>>>>         /*
>>>>          * The generic kexec code builds a page list with physical
>>>> @@ -167,7 +164,8 @@ void machine_kexec(struct kimage *image)
>>>>
>>>>         /*
>>>>          * We know we were online, and there will be no incoming IPIs at
>>>> -        * this point.
>>>> +        * this point. Mark online again before rebooting so that the crash
>>>> +        * analysis tool will see us correctly.
>>>>          */
>>>>         set_cpu_online(smp_processor_id(), true);
>>>>
>>>> diff --git a/arch/loongarch/kernel/mem.c b/arch/loongarch/kernel/mem.c
>>>> index 7423361b0ebc..c6def6ff81c8 100644
>>>> --- a/arch/loongarch/kernel/mem.c
>>>> +++ b/arch/loongarch/kernel/mem.c
>>>> @@ -5,6 +5,7 @@
>>>>  #include <linux/efi.h>
>>>>  #include <linux/initrd.h>
>>>>  #include <linux/memblock.h>
>>>> +#include <linux/of_fdt.h>
>>>>
>>>>  #include <asm/bootinfo.h>
>>>>  #include <asm/loongson.h>
>>>> @@ -61,4 +62,9 @@ void __init memblock_init(void)
>>>>
>>>>         /* Reserve the initrd */
>>>>         reserve_initrd_mem();
>>>> +
>>>> +       /* Mainly reserved memory for the elf core head */
>>>> +       early_init_fdt_scan_reserved_mem();
>>>> +       /* Parse linux,usable-memory-range is for crash dump kernel */
>>>> +       early_init_dt_check_for_usable_mem_range();
>>>>  }
>>>> diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S
>>>> index d1f242f74ea8..4ee5ac4ac2d7 100644
>>>> --- a/arch/loongarch/kernel/relocate_kernel.S
>>>> +++ b/arch/loongarch/kernel/relocate_kernel.S
>>>> @@ -28,6 +28,12 @@ SYM_CODE_START(relocate_new_kernel)
>>>>         move            s2, a2
>>>>         move            s3, a3
>>>>
>>>> +       /*
>>>> +        * In case of a kdump/crash kernel, the indirection page is not
>>>> +        * populated as the kernel is directly copied to a reserved location
>>>> +        */
>>>> +       beqz            s2, done
>>>> +
>>>>  process_entry:
>>>>         PTR_L           s4, s2, 0
>>>>         PTR_ADDI        s2, s2, SZREG
>>>> diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
>>>> index f938aae3e92c..ea34b77e402f 100644
>>>> --- a/arch/loongarch/kernel/setup.c
>>>> +++ b/arch/loongarch/kernel/setup.c
>>>> @@ -19,6 +19,8 @@
>>>>  #include <linux/memblock.h>
>>>>  #include <linux/initrd.h>
>>>>  #include <linux/ioport.h>
>>>> +#include <linux/kexec.h>
>>>> +#include <linux/crash_dump.h>
>>>>  #include <linux/root_dev.h>
>>>>  #include <linux/console.h>
>>>>  #include <linux/pfn.h>
>>>> @@ -186,6 +188,50 @@ static int __init early_parse_mem(char *p)
>>>>  }
>>>>  early_param("mem", early_parse_mem);
>>>>
>>>> +static void __init loongarch_parse_crashkernel(void)
>>>> +{
>>>> +#ifdef CONFIG_KEXEC
>>>> +       unsigned long long start;
>>>> +       unsigned long long total_mem;
>>>> +       unsigned long long crash_size, crash_base;
>>>> +       int ret;
>>>> +
>>>> +       total_mem = memblock_phys_mem_size();
>>>> +       ret = parse_crashkernel(boot_command_line, total_mem,
>>>> +                               &crash_size, &crash_base);
>>>> +       if (ret != 0 || crash_size <= 0)
>>>> +               return;
>>>> +
>>>> +
>>>> +       start = memblock_phys_alloc_range(crash_size, 1, crash_base,
>>>> +                                       crash_base + crash_size);
>>>> +       if (start != crash_base) {
>>>> +               pr_warn("Invalid memory region reserved for crash kernel\n");
>>>> +               return;
>>>> +       }
>>>> +
>>>> +       crashk_res.start = crash_base;
>>>> +       crashk_res.end   = crash_base + crash_size - 1;
>>>> +#endif
>>>> +}
>>>> +
>>>> +static void __init request_crashkernel(struct resource *res)
>>>> +{
>>>> +#ifdef CONFIG_KEXEC
>>>> +       int ret;
>>>> +
>>>> +       if (crashk_res.start == crashk_res.end)
>>>> +               return;
>>>> +
>>>> +       ret = request_resource(res, &crashk_res);
>>>> +       if (!ret)
>>>> +               pr_info("Reserving %ldMB of memory at %ldMB for crashkernel\n",
>>>> +                       (unsigned long)((crashk_res.end -
>>>> +                                        crashk_res.start + 1) >> 20),
>>>> +                       (unsigned long)(crashk_res.start  >> 20));
>>>> +#endif
>>>> +}
>>>> +
>>>>  void __init platform_init(void)
>>>>  {
>>>>         efi_init();
>>>> @@ -229,6 +275,8 @@ static void __init arch_mem_init(char **cmdline_p)
>>>>
>>>>         check_kernel_sections_mem();
>>>>
>>>> +       loongarch_parse_crashkernel();
>>>> +
>>>>         /*
>>>>          * In order to reduce the possibility of kernel panic when failed to
>>>>          * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate
>>>> @@ -290,6 +338,7 @@ static void __init resource_init(void)
>>>>                 request_resource(res, &code_resource);
>>>>                 request_resource(res, &data_resource);
>>>>                 request_resource(res, &bss_resource);
>>>> +               request_crashkernel(res);
>>>>         }
>>>>  }
>>>>
>>>> diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
>>>> index aa1c95aaf595..0e610872f3f4 100644
>>>> --- a/arch/loongarch/kernel/traps.c
>>>> +++ b/arch/loongarch/kernel/traps.c
>>>> @@ -10,6 +10,7 @@
>>>>  #include <linux/entry-common.h>
>>>>  #include <linux/init.h>
>>>>  #include <linux/kernel.h>
>>>> +#include <linux/kexec.h>
>>>>  #include <linux/module.h>
>>>>  #include <linux/extable.h>
>>>>  #include <linux/mm.h>
>>>> @@ -246,6 +247,9 @@ void __noreturn die(const char *str, struct pt_regs *regs)
>>>>
>>>>         oops_exit();
>>>>
>>>> +       if (regs && kexec_should_crash(current))
>>>> +               crash_kexec(regs);
>>>> +
>>>>         if (in_interrupt())
>>>>                 panic("Fatal exception in interrupt");
>>>>
>>>> --
>>>> 2.36.0
>>>>
>>


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/3] LoongArch: Add kdump support
  2022-09-05  2:04         ` Youling Tang
@ 2022-09-05  2:14           ` Huacai Chen
  2022-09-05  2:21             ` Youling Tang
  0 siblings, 1 reply; 20+ messages in thread
From: Huacai Chen @ 2022-09-05  2:14 UTC (permalink / raw)
  To: Youling Tang
  Cc: Baoquan He, Eric Biederman, WANG Xuerui, Vivek Goyal, Dave Young,
	Guo Ren, Jiaxun Yang, kexec, loongarch, LKML

On Mon, Sep 5, 2022 at 10:04 AM Youling Tang <tangyouling@loongson.cn> wrote:
>
> Hi, Huacai
>
> On 09/05/2022 09:38 AM, Huacai Chen wrote:
> > Hi, Youling,
> >
> > On Mon, Sep 5, 2022 at 8:54 AM Youling Tang <tangyouling@loongson.cn> wrote:
> >>
> >> Hi, Huacai
> >>
> >> On 09/04/2022 08:21 PM, Huacai Chen wrote:
> >>> Hi, Youling,
> >>>
> >>> I think crash.c can be merged into crash_dump.c
> >>
> >> Most architectures only implement copy_oldmem_page() in crash_dump.c,
> >> I'm not sure if merging crash.c into crash_dump.c will break its
> >> consistency?
> >>
> >> Thanks,
> >> Youling
> > Yes, you are right, crash.c cannot be merged into crash_dump.c, but it
> > can be merged into machine_kexec.c, as arm64 and riscv do.
>
> For arm64, machine_crash_shutdown() is placed in machine_kexec.c, and
> crash_smp_send_stop is placed in smp.c. If crash.c needs to be merged
> into machine_kexec.c, should crash_shutdown_secondary and
> crash_smp_send_stop be placed in smp.c?
I don't want to touch smp.c, all merged into machine_kexec.c seems reasonable.

Huacai
>
> Youling.
> >
> > Huacai
> >>
> >>>
> >>> Huacai
> >>>
> >>> On Mon, Aug 29, 2022 at 12:37 PM Youling Tang <tangyouling@loongson.cn> wrote:
> >>>>
> >>>> This patch adds support for kdump, the kernel will reserve a region
> >>>> for the crash kernel and jump there on panic.
> >>>>
> >>>> Arch-specific functions are added to allow for implementing a crash
> >>>> dump file interface, /proc/vmcore, which can be viewed as a ELF file.
> >>>>
> >>>> A user space tool, like kexec-tools, is responsible for allocating a
> >>>> separate region for the core's ELF header within crash kdump kernel
> >>>> memory and filling it in when executing kexec_load().
> >>>>
> >>>> Then, its location will be advertised to crash dump kernel via a new
> >>>> device-tree property, "linux,elfcorehdr", and crash dump kernel preserves
> >>>> the region for later use with fdt_reserve_elfcorehdr() at boot time.
> >>>>
> >>>> At the same time, it will also limit the crash kdump kernel to the
> >>>> crashkernel area via a new device-tree property, "linux, usable-memory-range",
> >>>> so as not to destroy the original kernel dump data.
> >>>>
> >>>> On crash dump kernel, /proc/vmcore will access the primary kernel's memory
> >>>> with copy_oldmem_page().
> >>>>
> >>>> I tested this on  LoongArch 3A5000 machine and works as expected (Suggest
> >>>> crashkernel parameter is "crashkernel=512M@2320M"), you may test it by
> >>>> triggering a crash through /proc/sysrq_trigger:
> >>>>
> >>>>  $ sudo kexec -p /boot/vmlinux-kdump --reuse-cmdline --append="nr_cpus=1"
> >>>>  # echo c > /proc/sysrq_trigger
> >>>>
> >>>> Signed-off-by: Youling Tang <tangyouling@loongson.cn>
> >>>> ---
> >>>>  arch/loongarch/Kconfig                  |  22 ++++++
> >>>>  arch/loongarch/Makefile                 |   4 +
> >>>>  arch/loongarch/kernel/Makefile          |   3 +-
> >>>>  arch/loongarch/kernel/crash.c           | 100 ++++++++++++++++++++++++
> >>>>  arch/loongarch/kernel/crash_dump.c      |  19 +++++
> >>>>  arch/loongarch/kernel/machine_kexec.c   |  12 ++-
> >>>>  arch/loongarch/kernel/mem.c             |   6 ++
> >>>>  arch/loongarch/kernel/relocate_kernel.S |   6 ++
> >>>>  arch/loongarch/kernel/setup.c           |  49 ++++++++++++
> >>>>  arch/loongarch/kernel/traps.c           |   4 +
> >>>>  10 files changed, 217 insertions(+), 8 deletions(-)
> >>>>  create mode 100644 arch/loongarch/kernel/crash.c
> >>>>  create mode 100644 arch/loongarch/kernel/crash_dump.c
> >>>>
> >>>> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
> >>>> index 903c82fa958d..7c1b07a5b5bd 100644
> >>>> --- a/arch/loongarch/Kconfig
> >>>> +++ b/arch/loongarch/Kconfig
> >>>> @@ -420,6 +420,28 @@ config KEXEC
> >>>>
> >>>>           The name comes from the similarity to the exec system call.
> >>>>
> >>>> +config CRASH_DUMP
> >>>> +       bool "Build kdump crash kernel"
> >>>> +       help
> >>>> +         Generate crash dump after being started by kexec. This should
> >>>> +         be normally only set in special crash dump kernels which are
> >>>> +         loaded in the main kernel with kexec-tools into a specially
> >>>> +         reserved region and then later executed after a crash by
> >>>> +         kdump/kexec.
> >>>> +
> >>>> +         For more details see Documentation/admin-guide/kdump/kdump.rst
> >>>> +
> >>>> +config PHYSICAL_START
> >>>> +       hex "Physical address where the kernel is loaded"
> >>>> +       default "0x9000000091000000" if 64BIT
> >>>> +       depends on CRASH_DUMP
> >>>> +       help
> >>>> +         This gives the XKPRANGE address where the kernel is loaded.
> >>>> +         If you plan to use kernel for capturing the crash dump change
> >>>> +         this value to start of the reserved region (the "X" value as
> >>>> +         specified in the "crashkernel=YM@XM" command line boot parameter
> >>>> +         passed to the panic-ed kernel).
> >>>> +
> >>>>  config SECCOMP
> >>>>         bool "Enable seccomp to safely compute untrusted bytecode"
> >>>>         depends on PROC_FS
> >>>> diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
> >>>> index 4bc47f47cfd8..7dabd580426d 100644
> >>>> --- a/arch/loongarch/Makefile
> >>>> +++ b/arch/loongarch/Makefile
> >>>> @@ -48,7 +48,11 @@ KBUILD_CFLAGS_MODULE         += -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs
> >>>>  cflags-y += -ffreestanding
> >>>>  cflags-y += $(call cc-option, -mno-check-zero-division)
> >>>>
> >>>> +ifdef CONFIG_PHYSICAL_START
> >>>> +load-y         = $(CONFIG_PHYSICAL_START)
> >>>> +else
> >>>>  load-y         = 0x9000000000200000
> >>>> +endif
> >>>>  bootvars-y     = VMLINUX_LOAD_ADDRESS=$(load-y)
> >>>>
> >>>>  drivers-$(CONFIG_PCI)          += arch/loongarch/pci/
> >>>> diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
> >>>> index 20b64ac3f128..df5aea129364 100644
> >>>> --- a/arch/loongarch/kernel/Makefile
> >>>> +++ b/arch/loongarch/kernel/Makefile
> >>>> @@ -17,7 +17,8 @@ obj-$(CONFIG_CPU_HAS_FPU)     += fpu.o
> >>>>  obj-$(CONFIG_MODULES)          += module.o module-sections.o
> >>>>  obj-$(CONFIG_STACKTRACE)       += stacktrace.o
> >>>>
> >>>> -obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o
> >>>> +obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o crash.o
> >>>> +obj-$(CONFIG_CRASH_DUMP)        += crash_dump.o
> >>>>
> >>>>  obj-$(CONFIG_PROC_FS)          += proc.o
> >>>>
> >>>> diff --git a/arch/loongarch/kernel/crash.c b/arch/loongarch/kernel/crash.c
> >>>> new file mode 100644
> >>>> index 000000000000..b4f249ec6301
> >>>> --- /dev/null
> >>>> +++ b/arch/loongarch/kernel/crash.c
> >>>> @@ -0,0 +1,100 @@
> >>>> +// SPDX-License-Identifier: GPL-2.0
> >>>> +/*
> >>>> + * Copyright (C) 2022 Loongson Technology Corporation Limited
> >>>> + *
> >>>> + * Derived from MIPS
> >>>> + */
> >>>> +#include <linux/kernel.h>
> >>>> +#include <linux/smp.h>
> >>>> +#include <linux/reboot.h>
> >>>> +#include <linux/crash_dump.h>
> >>>> +#include <linux/delay.h>
> >>>> +#include <linux/irq.h>
> >>>> +#include <linux/types.h>
> >>>> +#include <linux/sched.h>
> >>>> +#include <linux/sched/task_stack.h>
> >>>> +#include <asm/cacheflush.h>
> >>>> +#include <asm/kexec.h>
> >>>> +
> >>>> +static cpumask_t cpus_in_crash = CPU_MASK_NONE;
> >>>> +
> >>>> +#ifdef CONFIG_SMP
> >>>> +static void crash_shutdown_secondary(void *passed_regs)
> >>>> +{
> >>>> +       struct pt_regs *regs = passed_regs;
> >>>> +       int cpu = smp_processor_id();
> >>>> +
> >>>> +       /*
> >>>> +        * If we are passed registers, use those.  Otherwise get the
> >>>> +        * regs from the last interrupt, which should be correct, as
> >>>> +        * we are in an interrupt.  But if the regs are not there,
> >>>> +        * pull them from the top of the stack.  They are probably
> >>>> +        * wrong, but we need something to keep from crashing again.
> >>>> +        */
> >>>> +       if (!regs)
> >>>> +               regs = get_irq_regs();
> >>>> +       if (!regs)
> >>>> +               regs = task_pt_regs(current);
> >>>> +
> >>>> +       local_irq_disable();
> >>>> +       if (!cpumask_test_cpu(cpu, &cpus_in_crash))
> >>>> +               crash_save_cpu(regs, cpu);
> >>>> +       cpumask_set_cpu(cpu, &cpus_in_crash);
> >>>> +
> >>>> +       while (!atomic_read(&kexec_ready_to_reboot))
> >>>> +               cpu_relax();
> >>>> +
> >>>> +       kexec_reboot();
> >>>> +}
> >>>> +
> >>>> +/* Override the weak function in kernel/panic.c */
> >>>> +void crash_smp_send_stop(void)
> >>>> +{
> >>>> +       static int cpus_stopped;
> >>>> +       unsigned long timeout;
> >>>> +       unsigned int ncpus;
> >>>> +
> >>>> +       /*
> >>>> +        * This function can be called twice in panic path, but obviously
> >>>> +        * we execute this only once.
> >>>> +        */
> >>>> +       if (cpus_stopped)
> >>>> +               return;
> >>>> +
> >>>> +       cpus_stopped = 1;
> >>>> +
> >>>> +        /* Excluding the panic cpu */
> >>>> +       ncpus = num_online_cpus() - 1;
> >>>> +
> >>>> +       smp_call_function(crash_shutdown_secondary, NULL, 0);
> >>>> +       smp_wmb();
> >>>> +
> >>>> +       /*
> >>>> +        * The crash CPU sends an IPI and wait for other CPUs to
> >>>> +        * respond. Delay of at least 10 seconds.
> >>>> +        */
> >>>> +       pr_emerg("Sending IPI to other cpus...\n");
> >>>> +       timeout = USEC_PER_SEC * 10;
> >>>> +       while ((cpumask_weight(&cpus_in_crash) < ncpus) && timeout--) {
> >>>> +               cpu_relax();
> >>>> +               udelay(1);
> >>>> +       }
> >>>> +}
> >>>> +
> >>>> +#endif
> >>>> +
> >>>> +void machine_crash_shutdown(struct pt_regs *regs)
> >>>> +{
> >>>> +       int crashing_cpu;
> >>>> +
> >>>> +       local_irq_disable();
> >>>> +
> >>>> +       crashing_cpu = smp_processor_id();
> >>>> +       crash_save_cpu(regs, crashing_cpu);
> >>>> +
> >>>> +       /* shutdown non-crashing cpus */
> >>>> +       crash_smp_send_stop();
> >>>> +       cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
> >>>> +
> >>>> +       pr_info("Starting crashdump kernel...\n");
> >>>> +}
> >>>> diff --git a/arch/loongarch/kernel/crash_dump.c b/arch/loongarch/kernel/crash_dump.c
> >>>> new file mode 100644
> >>>> index 000000000000..13e5d2f7870d
> >>>> --- /dev/null
> >>>> +++ b/arch/loongarch/kernel/crash_dump.c
> >>>> @@ -0,0 +1,19 @@
> >>>> +// SPDX-License-Identifier: GPL-2.0
> >>>> +#include <linux/highmem.h>
> >>>> +#include <linux/crash_dump.h>
> >>>> +#include <linux/io.h>
> >>>> +
> >>>> +ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn,
> >>>> +                        size_t csize, unsigned long offset)
> >>>> +{
> >>>> +       void  *vaddr;
> >>>> +
> >>>> +       if (!csize)
> >>>> +               return 0;
> >>>> +
> >>>> +       vaddr = kmap_local_pfn(pfn);
> >>>> +       csize = copy_to_iter(vaddr + offset, csize, iter);
> >>>> +       kunmap_local(vaddr);
> >>>> +
> >>>> +       return csize;
> >>>> +}
> >>>> diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c
> >>>> index 4ffcd4cd9c8c..f793a3ff09a3 100644
> >>>> --- a/arch/loongarch/kernel/machine_kexec.c
> >>>> +++ b/arch/loongarch/kernel/machine_kexec.c
> >>>> @@ -69,7 +69,7 @@ int machine_kexec_prepare(struct kimage *kimage)
> >>>>                 continue;
> >>>>         }
> >>>>
> >>>> -       /* kexec need a safe page to save reboot_code_buffer */
> >>>> +       /* kexec/kdump need a safe page to save reboot_code_buffer */
> >>>>         kimage->control_code_page = virt_to_page((void *)KEXEC_CTRL_CODE);
> >>>>
> >>>>         reboot_code_buffer =
> >>>> @@ -113,10 +113,6 @@ static void kexec_shutdown_secondary(void *)
> >>>>
> >>>>         kexec_reboot();
> >>>>  }
> >>>> -
> >>>> -void machine_crash_shutdown(struct pt_regs *regs)
> >>>> -{
> >>>> -}
> >>>>  #endif
> >>>>
> >>>>  void machine_shutdown(void)
> >>>> @@ -135,7 +131,8 @@ void machine_kexec(struct kimage *image)
> >>>>
> >>>>         jump_addr = (unsigned long)phys_to_virt(image->start);
> >>>>
> >>>> -       first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
> >>>> +       if (image->type == KEXEC_TYPE_DEFAULT)
> >>>> +               first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
> >>>>
> >>>>         /*
> >>>>          * The generic kexec code builds a page list with physical
> >>>> @@ -167,7 +164,8 @@ void machine_kexec(struct kimage *image)
> >>>>
> >>>>         /*
> >>>>          * We know we were online, and there will be no incoming IPIs at
> >>>> -        * this point.
> >>>> +        * this point. Mark online again before rebooting so that the crash
> >>>> +        * analysis tool will see us correctly.
> >>>>          */
> >>>>         set_cpu_online(smp_processor_id(), true);
> >>>>
> >>>> diff --git a/arch/loongarch/kernel/mem.c b/arch/loongarch/kernel/mem.c
> >>>> index 7423361b0ebc..c6def6ff81c8 100644
> >>>> --- a/arch/loongarch/kernel/mem.c
> >>>> +++ b/arch/loongarch/kernel/mem.c
> >>>> @@ -5,6 +5,7 @@
> >>>>  #include <linux/efi.h>
> >>>>  #include <linux/initrd.h>
> >>>>  #include <linux/memblock.h>
> >>>> +#include <linux/of_fdt.h>
> >>>>
> >>>>  #include <asm/bootinfo.h>
> >>>>  #include <asm/loongson.h>
> >>>> @@ -61,4 +62,9 @@ void __init memblock_init(void)
> >>>>
> >>>>         /* Reserve the initrd */
> >>>>         reserve_initrd_mem();
> >>>> +
> >>>> +       /* Mainly reserved memory for the elf core head */
> >>>> +       early_init_fdt_scan_reserved_mem();
> >>>> +       /* Parse linux,usable-memory-range is for crash dump kernel */
> >>>> +       early_init_dt_check_for_usable_mem_range();
> >>>>  }
> >>>> diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S
> >>>> index d1f242f74ea8..4ee5ac4ac2d7 100644
> >>>> --- a/arch/loongarch/kernel/relocate_kernel.S
> >>>> +++ b/arch/loongarch/kernel/relocate_kernel.S
> >>>> @@ -28,6 +28,12 @@ SYM_CODE_START(relocate_new_kernel)
> >>>>         move            s2, a2
> >>>>         move            s3, a3
> >>>>
> >>>> +       /*
> >>>> +        * In case of a kdump/crash kernel, the indirection page is not
> >>>> +        * populated as the kernel is directly copied to a reserved location
> >>>> +        */
> >>>> +       beqz            s2, done
> >>>> +
> >>>>  process_entry:
> >>>>         PTR_L           s4, s2, 0
> >>>>         PTR_ADDI        s2, s2, SZREG
> >>>> diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
> >>>> index f938aae3e92c..ea34b77e402f 100644
> >>>> --- a/arch/loongarch/kernel/setup.c
> >>>> +++ b/arch/loongarch/kernel/setup.c
> >>>> @@ -19,6 +19,8 @@
> >>>>  #include <linux/memblock.h>
> >>>>  #include <linux/initrd.h>
> >>>>  #include <linux/ioport.h>
> >>>> +#include <linux/kexec.h>
> >>>> +#include <linux/crash_dump.h>
> >>>>  #include <linux/root_dev.h>
> >>>>  #include <linux/console.h>
> >>>>  #include <linux/pfn.h>
> >>>> @@ -186,6 +188,50 @@ static int __init early_parse_mem(char *p)
> >>>>  }
> >>>>  early_param("mem", early_parse_mem);
> >>>>
> >>>> +static void __init loongarch_parse_crashkernel(void)
> >>>> +{
> >>>> +#ifdef CONFIG_KEXEC
> >>>> +       unsigned long long start;
> >>>> +       unsigned long long total_mem;
> >>>> +       unsigned long long crash_size, crash_base;
> >>>> +       int ret;
> >>>> +
> >>>> +       total_mem = memblock_phys_mem_size();
> >>>> +       ret = parse_crashkernel(boot_command_line, total_mem,
> >>>> +                               &crash_size, &crash_base);
> >>>> +       if (ret != 0 || crash_size <= 0)
> >>>> +               return;
> >>>> +
> >>>> +
> >>>> +       start = memblock_phys_alloc_range(crash_size, 1, crash_base,
> >>>> +                                       crash_base + crash_size);
> >>>> +       if (start != crash_base) {
> >>>> +               pr_warn("Invalid memory region reserved for crash kernel\n");
> >>>> +               return;
> >>>> +       }
> >>>> +
> >>>> +       crashk_res.start = crash_base;
> >>>> +       crashk_res.end   = crash_base + crash_size - 1;
> >>>> +#endif
> >>>> +}
> >>>> +
> >>>> +static void __init request_crashkernel(struct resource *res)
> >>>> +{
> >>>> +#ifdef CONFIG_KEXEC
> >>>> +       int ret;
> >>>> +
> >>>> +       if (crashk_res.start == crashk_res.end)
> >>>> +               return;
> >>>> +
> >>>> +       ret = request_resource(res, &crashk_res);
> >>>> +       if (!ret)
> >>>> +               pr_info("Reserving %ldMB of memory at %ldMB for crashkernel\n",
> >>>> +                       (unsigned long)((crashk_res.end -
> >>>> +                                        crashk_res.start + 1) >> 20),
> >>>> +                       (unsigned long)(crashk_res.start  >> 20));
> >>>> +#endif
> >>>> +}
> >>>> +
> >>>>  void __init platform_init(void)
> >>>>  {
> >>>>         efi_init();
> >>>> @@ -229,6 +275,8 @@ static void __init arch_mem_init(char **cmdline_p)
> >>>>
> >>>>         check_kernel_sections_mem();
> >>>>
> >>>> +       loongarch_parse_crashkernel();
> >>>> +
> >>>>         /*
> >>>>          * In order to reduce the possibility of kernel panic when failed to
> >>>>          * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate
> >>>> @@ -290,6 +338,7 @@ static void __init resource_init(void)
> >>>>                 request_resource(res, &code_resource);
> >>>>                 request_resource(res, &data_resource);
> >>>>                 request_resource(res, &bss_resource);
> >>>> +               request_crashkernel(res);
> >>>>         }
> >>>>  }
> >>>>
> >>>> diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
> >>>> index aa1c95aaf595..0e610872f3f4 100644
> >>>> --- a/arch/loongarch/kernel/traps.c
> >>>> +++ b/arch/loongarch/kernel/traps.c
> >>>> @@ -10,6 +10,7 @@
> >>>>  #include <linux/entry-common.h>
> >>>>  #include <linux/init.h>
> >>>>  #include <linux/kernel.h>
> >>>> +#include <linux/kexec.h>
> >>>>  #include <linux/module.h>
> >>>>  #include <linux/extable.h>
> >>>>  #include <linux/mm.h>
> >>>> @@ -246,6 +247,9 @@ void __noreturn die(const char *str, struct pt_regs *regs)
> >>>>
> >>>>         oops_exit();
> >>>>
> >>>> +       if (regs && kexec_should_crash(current))
> >>>> +               crash_kexec(regs);
> >>>> +
> >>>>         if (in_interrupt())
> >>>>                 panic("Fatal exception in interrupt");
> >>>>
> >>>> --
> >>>> 2.36.0
> >>>>
> >>
>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/3] LoongArch: Add kdump support
  2022-09-05  2:14           ` Huacai Chen
@ 2022-09-05  2:21             ` Youling Tang
  2022-09-05  7:32               ` Huacai Chen
  0 siblings, 1 reply; 20+ messages in thread
From: Youling Tang @ 2022-09-05  2:21 UTC (permalink / raw)
  To: Huacai Chen
  Cc: Baoquan He, Eric Biederman, WANG Xuerui, Vivek Goyal, Dave Young,
	Guo Ren, Jiaxun Yang, kexec, loongarch, LKML



On 09/05/2022 10:14 AM, Huacai Chen wrote:
> On Mon, Sep 5, 2022 at 10:04 AM Youling Tang <tangyouling@loongson.cn> wrote:
>>
>> Hi, Huacai
>>
>> On 09/05/2022 09:38 AM, Huacai Chen wrote:
>>> Hi, Youling,
>>>
>>> On Mon, Sep 5, 2022 at 8:54 AM Youling Tang <tangyouling@loongson.cn> wrote:
>>>>
>>>> Hi, Huacai
>>>>
>>>> On 09/04/2022 08:21 PM, Huacai Chen wrote:
>>>>> Hi, Youling,
>>>>>
>>>>> I think crash.c can be merged into crash_dump.c
>>>>
>>>> Most architectures only implement copy_oldmem_page() in crash_dump.c,
>>>> I'm not sure if merging crash.c into crash_dump.c will break its
>>>> consistency?
>>>>
>>>> Thanks,
>>>> Youling
>>> Yes, you are right, crash.c cannot be merged into crash_dump.c, but it
>>> can be merged into machine_kexec.c, as arm64 and riscv do.
>>
>> For arm64, machine_crash_shutdown() is placed in machine_kexec.c, and
>> crash_smp_send_stop is placed in smp.c. If crash.c needs to be merged
>> into machine_kexec.c, should crash_shutdown_secondary and
>> crash_smp_send_stop be placed in smp.c?
> I don't want to touch smp.c, all merged into machine_kexec.c seems reasonable.

Ok, I'll merge all into machine_kexec.c.

Youling.

>
> Huacai
>>
>> Youling.
>>>
>>> Huacai
>>>>
>>>>>
>>>>> Huacai
>>>>>
>>>>> On Mon, Aug 29, 2022 at 12:37 PM Youling Tang <tangyouling@loongson.cn> wrote:
>>>>>>
>>>>>> This patch adds support for kdump, the kernel will reserve a region
>>>>>> for the crash kernel and jump there on panic.
>>>>>>
>>>>>> Arch-specific functions are added to allow for implementing a crash
>>>>>> dump file interface, /proc/vmcore, which can be viewed as a ELF file.
>>>>>>
>>>>>> A user space tool, like kexec-tools, is responsible for allocating a
>>>>>> separate region for the core's ELF header within crash kdump kernel
>>>>>> memory and filling it in when executing kexec_load().
>>>>>>
>>>>>> Then, its location will be advertised to crash dump kernel via a new
>>>>>> device-tree property, "linux,elfcorehdr", and crash dump kernel preserves
>>>>>> the region for later use with fdt_reserve_elfcorehdr() at boot time.
>>>>>>
>>>>>> At the same time, it will also limit the crash kdump kernel to the
>>>>>> crashkernel area via a new device-tree property, "linux, usable-memory-range",
>>>>>> so as not to destroy the original kernel dump data.
>>>>>>
>>>>>> On crash dump kernel, /proc/vmcore will access the primary kernel's memory
>>>>>> with copy_oldmem_page().
>>>>>>
>>>>>> I tested this on  LoongArch 3A5000 machine and works as expected (Suggest
>>>>>> crashkernel parameter is "crashkernel=512M@2320M"), you may test it by
>>>>>> triggering a crash through /proc/sysrq_trigger:
>>>>>>
>>>>>>  $ sudo kexec -p /boot/vmlinux-kdump --reuse-cmdline --append="nr_cpus=1"
>>>>>>  # echo c > /proc/sysrq_trigger
>>>>>>
>>>>>> Signed-off-by: Youling Tang <tangyouling@loongson.cn>
>>>>>> ---
>>>>>>  arch/loongarch/Kconfig                  |  22 ++++++
>>>>>>  arch/loongarch/Makefile                 |   4 +
>>>>>>  arch/loongarch/kernel/Makefile          |   3 +-
>>>>>>  arch/loongarch/kernel/crash.c           | 100 ++++++++++++++++++++++++
>>>>>>  arch/loongarch/kernel/crash_dump.c      |  19 +++++
>>>>>>  arch/loongarch/kernel/machine_kexec.c   |  12 ++-
>>>>>>  arch/loongarch/kernel/mem.c             |   6 ++
>>>>>>  arch/loongarch/kernel/relocate_kernel.S |   6 ++
>>>>>>  arch/loongarch/kernel/setup.c           |  49 ++++++++++++
>>>>>>  arch/loongarch/kernel/traps.c           |   4 +
>>>>>>  10 files changed, 217 insertions(+), 8 deletions(-)
>>>>>>  create mode 100644 arch/loongarch/kernel/crash.c
>>>>>>  create mode 100644 arch/loongarch/kernel/crash_dump.c
>>>>>>
>>>>>> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
>>>>>> index 903c82fa958d..7c1b07a5b5bd 100644
>>>>>> --- a/arch/loongarch/Kconfig
>>>>>> +++ b/arch/loongarch/Kconfig
>>>>>> @@ -420,6 +420,28 @@ config KEXEC
>>>>>>
>>>>>>           The name comes from the similarity to the exec system call.
>>>>>>
>>>>>> +config CRASH_DUMP
>>>>>> +       bool "Build kdump crash kernel"
>>>>>> +       help
>>>>>> +         Generate crash dump after being started by kexec. This should
>>>>>> +         be normally only set in special crash dump kernels which are
>>>>>> +         loaded in the main kernel with kexec-tools into a specially
>>>>>> +         reserved region and then later executed after a crash by
>>>>>> +         kdump/kexec.
>>>>>> +
>>>>>> +         For more details see Documentation/admin-guide/kdump/kdump.rst
>>>>>> +
>>>>>> +config PHYSICAL_START
>>>>>> +       hex "Physical address where the kernel is loaded"
>>>>>> +       default "0x9000000091000000" if 64BIT
>>>>>> +       depends on CRASH_DUMP
>>>>>> +       help
>>>>>> +         This gives the XKPRANGE address where the kernel is loaded.
>>>>>> +         If you plan to use kernel for capturing the crash dump change
>>>>>> +         this value to start of the reserved region (the "X" value as
>>>>>> +         specified in the "crashkernel=YM@XM" command line boot parameter
>>>>>> +         passed to the panic-ed kernel).
>>>>>> +
>>>>>>  config SECCOMP
>>>>>>         bool "Enable seccomp to safely compute untrusted bytecode"
>>>>>>         depends on PROC_FS
>>>>>> diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
>>>>>> index 4bc47f47cfd8..7dabd580426d 100644
>>>>>> --- a/arch/loongarch/Makefile
>>>>>> +++ b/arch/loongarch/Makefile
>>>>>> @@ -48,7 +48,11 @@ KBUILD_CFLAGS_MODULE         += -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs
>>>>>>  cflags-y += -ffreestanding
>>>>>>  cflags-y += $(call cc-option, -mno-check-zero-division)
>>>>>>
>>>>>> +ifdef CONFIG_PHYSICAL_START
>>>>>> +load-y         = $(CONFIG_PHYSICAL_START)
>>>>>> +else
>>>>>>  load-y         = 0x9000000000200000
>>>>>> +endif
>>>>>>  bootvars-y     = VMLINUX_LOAD_ADDRESS=$(load-y)
>>>>>>
>>>>>>  drivers-$(CONFIG_PCI)          += arch/loongarch/pci/
>>>>>> diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
>>>>>> index 20b64ac3f128..df5aea129364 100644
>>>>>> --- a/arch/loongarch/kernel/Makefile
>>>>>> +++ b/arch/loongarch/kernel/Makefile
>>>>>> @@ -17,7 +17,8 @@ obj-$(CONFIG_CPU_HAS_FPU)     += fpu.o
>>>>>>  obj-$(CONFIG_MODULES)          += module.o module-sections.o
>>>>>>  obj-$(CONFIG_STACKTRACE)       += stacktrace.o
>>>>>>
>>>>>> -obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o
>>>>>> +obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o crash.o
>>>>>> +obj-$(CONFIG_CRASH_DUMP)        += crash_dump.o
>>>>>>
>>>>>>  obj-$(CONFIG_PROC_FS)          += proc.o
>>>>>>
>>>>>> diff --git a/arch/loongarch/kernel/crash.c b/arch/loongarch/kernel/crash.c
>>>>>> new file mode 100644
>>>>>> index 000000000000..b4f249ec6301
>>>>>> --- /dev/null
>>>>>> +++ b/arch/loongarch/kernel/crash.c
>>>>>> @@ -0,0 +1,100 @@
>>>>>> +// SPDX-License-Identifier: GPL-2.0
>>>>>> +/*
>>>>>> + * Copyright (C) 2022 Loongson Technology Corporation Limited
>>>>>> + *
>>>>>> + * Derived from MIPS
>>>>>> + */
>>>>>> +#include <linux/kernel.h>
>>>>>> +#include <linux/smp.h>
>>>>>> +#include <linux/reboot.h>
>>>>>> +#include <linux/crash_dump.h>
>>>>>> +#include <linux/delay.h>
>>>>>> +#include <linux/irq.h>
>>>>>> +#include <linux/types.h>
>>>>>> +#include <linux/sched.h>
>>>>>> +#include <linux/sched/task_stack.h>
>>>>>> +#include <asm/cacheflush.h>
>>>>>> +#include <asm/kexec.h>
>>>>>> +
>>>>>> +static cpumask_t cpus_in_crash = CPU_MASK_NONE;
>>>>>> +
>>>>>> +#ifdef CONFIG_SMP
>>>>>> +static void crash_shutdown_secondary(void *passed_regs)
>>>>>> +{
>>>>>> +       struct pt_regs *regs = passed_regs;
>>>>>> +       int cpu = smp_processor_id();
>>>>>> +
>>>>>> +       /*
>>>>>> +        * If we are passed registers, use those.  Otherwise get the
>>>>>> +        * regs from the last interrupt, which should be correct, as
>>>>>> +        * we are in an interrupt.  But if the regs are not there,
>>>>>> +        * pull them from the top of the stack.  They are probably
>>>>>> +        * wrong, but we need something to keep from crashing again.
>>>>>> +        */
>>>>>> +       if (!regs)
>>>>>> +               regs = get_irq_regs();
>>>>>> +       if (!regs)
>>>>>> +               regs = task_pt_regs(current);
>>>>>> +
>>>>>> +       local_irq_disable();
>>>>>> +       if (!cpumask_test_cpu(cpu, &cpus_in_crash))
>>>>>> +               crash_save_cpu(regs, cpu);
>>>>>> +       cpumask_set_cpu(cpu, &cpus_in_crash);
>>>>>> +
>>>>>> +       while (!atomic_read(&kexec_ready_to_reboot))
>>>>>> +               cpu_relax();
>>>>>> +
>>>>>> +       kexec_reboot();
>>>>>> +}
>>>>>> +
>>>>>> +/* Override the weak function in kernel/panic.c */
>>>>>> +void crash_smp_send_stop(void)
>>>>>> +{
>>>>>> +       static int cpus_stopped;
>>>>>> +       unsigned long timeout;
>>>>>> +       unsigned int ncpus;
>>>>>> +
>>>>>> +       /*
>>>>>> +        * This function can be called twice in panic path, but obviously
>>>>>> +        * we execute this only once.
>>>>>> +        */
>>>>>> +       if (cpus_stopped)
>>>>>> +               return;
>>>>>> +
>>>>>> +       cpus_stopped = 1;
>>>>>> +
>>>>>> +        /* Excluding the panic cpu */
>>>>>> +       ncpus = num_online_cpus() - 1;
>>>>>> +
>>>>>> +       smp_call_function(crash_shutdown_secondary, NULL, 0);
>>>>>> +       smp_wmb();
>>>>>> +
>>>>>> +       /*
>>>>>> +        * The crash CPU sends an IPI and wait for other CPUs to
>>>>>> +        * respond. Delay of at least 10 seconds.
>>>>>> +        */
>>>>>> +       pr_emerg("Sending IPI to other cpus...\n");
>>>>>> +       timeout = USEC_PER_SEC * 10;
>>>>>> +       while ((cpumask_weight(&cpus_in_crash) < ncpus) && timeout--) {
>>>>>> +               cpu_relax();
>>>>>> +               udelay(1);
>>>>>> +       }
>>>>>> +}
>>>>>> +
>>>>>> +#endif
>>>>>> +
>>>>>> +void machine_crash_shutdown(struct pt_regs *regs)
>>>>>> +{
>>>>>> +       int crashing_cpu;
>>>>>> +
>>>>>> +       local_irq_disable();
>>>>>> +
>>>>>> +       crashing_cpu = smp_processor_id();
>>>>>> +       crash_save_cpu(regs, crashing_cpu);
>>>>>> +
>>>>>> +       /* shutdown non-crashing cpus */
>>>>>> +       crash_smp_send_stop();
>>>>>> +       cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
>>>>>> +
>>>>>> +       pr_info("Starting crashdump kernel...\n");
>>>>>> +}
>>>>>> diff --git a/arch/loongarch/kernel/crash_dump.c b/arch/loongarch/kernel/crash_dump.c
>>>>>> new file mode 100644
>>>>>> index 000000000000..13e5d2f7870d
>>>>>> --- /dev/null
>>>>>> +++ b/arch/loongarch/kernel/crash_dump.c
>>>>>> @@ -0,0 +1,19 @@
>>>>>> +// SPDX-License-Identifier: GPL-2.0
>>>>>> +#include <linux/highmem.h>
>>>>>> +#include <linux/crash_dump.h>
>>>>>> +#include <linux/io.h>
>>>>>> +
>>>>>> +ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn,
>>>>>> +                        size_t csize, unsigned long offset)
>>>>>> +{
>>>>>> +       void  *vaddr;
>>>>>> +
>>>>>> +       if (!csize)
>>>>>> +               return 0;
>>>>>> +
>>>>>> +       vaddr = kmap_local_pfn(pfn);
>>>>>> +       csize = copy_to_iter(vaddr + offset, csize, iter);
>>>>>> +       kunmap_local(vaddr);
>>>>>> +
>>>>>> +       return csize;
>>>>>> +}
>>>>>> diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c
>>>>>> index 4ffcd4cd9c8c..f793a3ff09a3 100644
>>>>>> --- a/arch/loongarch/kernel/machine_kexec.c
>>>>>> +++ b/arch/loongarch/kernel/machine_kexec.c
>>>>>> @@ -69,7 +69,7 @@ int machine_kexec_prepare(struct kimage *kimage)
>>>>>>                 continue;
>>>>>>         }
>>>>>>
>>>>>> -       /* kexec need a safe page to save reboot_code_buffer */
>>>>>> +       /* kexec/kdump need a safe page to save reboot_code_buffer */
>>>>>>         kimage->control_code_page = virt_to_page((void *)KEXEC_CTRL_CODE);
>>>>>>
>>>>>>         reboot_code_buffer =
>>>>>> @@ -113,10 +113,6 @@ static void kexec_shutdown_secondary(void *)
>>>>>>
>>>>>>         kexec_reboot();
>>>>>>  }
>>>>>> -
>>>>>> -void machine_crash_shutdown(struct pt_regs *regs)
>>>>>> -{
>>>>>> -}
>>>>>>  #endif
>>>>>>
>>>>>>  void machine_shutdown(void)
>>>>>> @@ -135,7 +131,8 @@ void machine_kexec(struct kimage *image)
>>>>>>
>>>>>>         jump_addr = (unsigned long)phys_to_virt(image->start);
>>>>>>
>>>>>> -       first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
>>>>>> +       if (image->type == KEXEC_TYPE_DEFAULT)
>>>>>> +               first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
>>>>>>
>>>>>>         /*
>>>>>>          * The generic kexec code builds a page list with physical
>>>>>> @@ -167,7 +164,8 @@ void machine_kexec(struct kimage *image)
>>>>>>
>>>>>>         /*
>>>>>>          * We know we were online, and there will be no incoming IPIs at
>>>>>> -        * this point.
>>>>>> +        * this point. Mark online again before rebooting so that the crash
>>>>>> +        * analysis tool will see us correctly.
>>>>>>          */
>>>>>>         set_cpu_online(smp_processor_id(), true);
>>>>>>
>>>>>> diff --git a/arch/loongarch/kernel/mem.c b/arch/loongarch/kernel/mem.c
>>>>>> index 7423361b0ebc..c6def6ff81c8 100644
>>>>>> --- a/arch/loongarch/kernel/mem.c
>>>>>> +++ b/arch/loongarch/kernel/mem.c
>>>>>> @@ -5,6 +5,7 @@
>>>>>>  #include <linux/efi.h>
>>>>>>  #include <linux/initrd.h>
>>>>>>  #include <linux/memblock.h>
>>>>>> +#include <linux/of_fdt.h>
>>>>>>
>>>>>>  #include <asm/bootinfo.h>
>>>>>>  #include <asm/loongson.h>
>>>>>> @@ -61,4 +62,9 @@ void __init memblock_init(void)
>>>>>>
>>>>>>         /* Reserve the initrd */
>>>>>>         reserve_initrd_mem();
>>>>>> +
>>>>>> +       /* Mainly reserved memory for the elf core head */
>>>>>> +       early_init_fdt_scan_reserved_mem();
>>>>>> +       /* Parse linux,usable-memory-range is for crash dump kernel */
>>>>>> +       early_init_dt_check_for_usable_mem_range();
>>>>>>  }
>>>>>> diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S
>>>>>> index d1f242f74ea8..4ee5ac4ac2d7 100644
>>>>>> --- a/arch/loongarch/kernel/relocate_kernel.S
>>>>>> +++ b/arch/loongarch/kernel/relocate_kernel.S
>>>>>> @@ -28,6 +28,12 @@ SYM_CODE_START(relocate_new_kernel)
>>>>>>         move            s2, a2
>>>>>>         move            s3, a3
>>>>>>
>>>>>> +       /*
>>>>>> +        * In case of a kdump/crash kernel, the indirection page is not
>>>>>> +        * populated as the kernel is directly copied to a reserved location
>>>>>> +        */
>>>>>> +       beqz            s2, done
>>>>>> +
>>>>>>  process_entry:
>>>>>>         PTR_L           s4, s2, 0
>>>>>>         PTR_ADDI        s2, s2, SZREG
>>>>>> diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
>>>>>> index f938aae3e92c..ea34b77e402f 100644
>>>>>> --- a/arch/loongarch/kernel/setup.c
>>>>>> +++ b/arch/loongarch/kernel/setup.c
>>>>>> @@ -19,6 +19,8 @@
>>>>>>  #include <linux/memblock.h>
>>>>>>  #include <linux/initrd.h>
>>>>>>  #include <linux/ioport.h>
>>>>>> +#include <linux/kexec.h>
>>>>>> +#include <linux/crash_dump.h>
>>>>>>  #include <linux/root_dev.h>
>>>>>>  #include <linux/console.h>
>>>>>>  #include <linux/pfn.h>
>>>>>> @@ -186,6 +188,50 @@ static int __init early_parse_mem(char *p)
>>>>>>  }
>>>>>>  early_param("mem", early_parse_mem);
>>>>>>
>>>>>> +static void __init loongarch_parse_crashkernel(void)
>>>>>> +{
>>>>>> +#ifdef CONFIG_KEXEC
>>>>>> +       unsigned long long start;
>>>>>> +       unsigned long long total_mem;
>>>>>> +       unsigned long long crash_size, crash_base;
>>>>>> +       int ret;
>>>>>> +
>>>>>> +       total_mem = memblock_phys_mem_size();
>>>>>> +       ret = parse_crashkernel(boot_command_line, total_mem,
>>>>>> +                               &crash_size, &crash_base);
>>>>>> +       if (ret != 0 || crash_size <= 0)
>>>>>> +               return;
>>>>>> +
>>>>>> +
>>>>>> +       start = memblock_phys_alloc_range(crash_size, 1, crash_base,
>>>>>> +                                       crash_base + crash_size);
>>>>>> +       if (start != crash_base) {
>>>>>> +               pr_warn("Invalid memory region reserved for crash kernel\n");
>>>>>> +               return;
>>>>>> +       }
>>>>>> +
>>>>>> +       crashk_res.start = crash_base;
>>>>>> +       crashk_res.end   = crash_base + crash_size - 1;
>>>>>> +#endif
>>>>>> +}
>>>>>> +
>>>>>> +static void __init request_crashkernel(struct resource *res)
>>>>>> +{
>>>>>> +#ifdef CONFIG_KEXEC
>>>>>> +       int ret;
>>>>>> +
>>>>>> +       if (crashk_res.start == crashk_res.end)
>>>>>> +               return;
>>>>>> +
>>>>>> +       ret = request_resource(res, &crashk_res);
>>>>>> +       if (!ret)
>>>>>> +               pr_info("Reserving %ldMB of memory at %ldMB for crashkernel\n",
>>>>>> +                       (unsigned long)((crashk_res.end -
>>>>>> +                                        crashk_res.start + 1) >> 20),
>>>>>> +                       (unsigned long)(crashk_res.start  >> 20));
>>>>>> +#endif
>>>>>> +}
>>>>>> +
>>>>>>  void __init platform_init(void)
>>>>>>  {
>>>>>>         efi_init();
>>>>>> @@ -229,6 +275,8 @@ static void __init arch_mem_init(char **cmdline_p)
>>>>>>
>>>>>>         check_kernel_sections_mem();
>>>>>>
>>>>>> +       loongarch_parse_crashkernel();
>>>>>> +
>>>>>>         /*
>>>>>>          * In order to reduce the possibility of kernel panic when failed to
>>>>>>          * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate
>>>>>> @@ -290,6 +338,7 @@ static void __init resource_init(void)
>>>>>>                 request_resource(res, &code_resource);
>>>>>>                 request_resource(res, &data_resource);
>>>>>>                 request_resource(res, &bss_resource);
>>>>>> +               request_crashkernel(res);
>>>>>>         }
>>>>>>  }
>>>>>>
>>>>>> diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
>>>>>> index aa1c95aaf595..0e610872f3f4 100644
>>>>>> --- a/arch/loongarch/kernel/traps.c
>>>>>> +++ b/arch/loongarch/kernel/traps.c
>>>>>> @@ -10,6 +10,7 @@
>>>>>>  #include <linux/entry-common.h>
>>>>>>  #include <linux/init.h>
>>>>>>  #include <linux/kernel.h>
>>>>>> +#include <linux/kexec.h>
>>>>>>  #include <linux/module.h>
>>>>>>  #include <linux/extable.h>
>>>>>>  #include <linux/mm.h>
>>>>>> @@ -246,6 +247,9 @@ void __noreturn die(const char *str, struct pt_regs *regs)
>>>>>>
>>>>>>         oops_exit();
>>>>>>
>>>>>> +       if (regs && kexec_should_crash(current))
>>>>>> +               crash_kexec(regs);
>>>>>> +
>>>>>>         if (in_interrupt())
>>>>>>                 panic("Fatal exception in interrupt");
>>>>>>
>>>>>> --
>>>>>> 2.36.0
>>>>>>
>>>>
>>


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/3] LoongArch: Add kdump support
  2022-09-05  2:21             ` Youling Tang
@ 2022-09-05  7:32               ` Huacai Chen
  2022-09-05  7:45                 ` Youling Tang
  0 siblings, 1 reply; 20+ messages in thread
From: Huacai Chen @ 2022-09-05  7:32 UTC (permalink / raw)
  To: Youling Tang
  Cc: Baoquan He, Eric Biederman, WANG Xuerui, Vivek Goyal, Dave Young,
	Guo Ren, Jiaxun Yang, kexec, loongarch, LKML

Hi, Youling,

On Mon, Sep 5, 2022 at 10:22 AM Youling Tang <tangyouling@loongson.cn> wrote:
>
>
>
> On 09/05/2022 10:14 AM, Huacai Chen wrote:
> > On Mon, Sep 5, 2022 at 10:04 AM Youling Tang <tangyouling@loongson.cn> wrote:
> >>
> >> Hi, Huacai
> >>
> >> On 09/05/2022 09:38 AM, Huacai Chen wrote:
> >>> Hi, Youling,
> >>>
> >>> On Mon, Sep 5, 2022 at 8:54 AM Youling Tang <tangyouling@loongson.cn> wrote:
> >>>>
> >>>> Hi, Huacai
> >>>>
> >>>> On 09/04/2022 08:21 PM, Huacai Chen wrote:
> >>>>> Hi, Youling,
> >>>>>
> >>>>> I think crash.c can be merged into crash_dump.c
> >>>>
> >>>> Most architectures only implement copy_oldmem_page() in crash_dump.c,
> >>>> I'm not sure if merging crash.c into crash_dump.c will break its
> >>>> consistency?
> >>>>
> >>>> Thanks,
> >>>> Youling
> >>> Yes, you are right, crash.c cannot be merged into crash_dump.c, but it
> >>> can be merged into machine_kexec.c, as arm64 and riscv do.
> >>
> >> For arm64, machine_crash_shutdown() is placed in machine_kexec.c, and
> >> crash_smp_send_stop is placed in smp.c. If crash.c needs to be merged
> >> into machine_kexec.c, should crash_shutdown_secondary and
> >> crash_smp_send_stop be placed in smp.c?
> > I don't want to touch smp.c, all merged into machine_kexec.c seems reasonable.
>
> Ok, I'll merge all into machine_kexec.c.
>
> Youling.
Another problem, 0x9000000091000000 for PHYSICAL_START is too tricky.
If you want to skip the "low memory", maybe we can use
0x9000000090000000 or 0x90000000a0000000?

Huacai
>
> >
> > Huacai
> >>
> >> Youling.
> >>>
> >>> Huacai
> >>>>
> >>>>>
> >>>>> Huacai
> >>>>>
> >>>>> On Mon, Aug 29, 2022 at 12:37 PM Youling Tang <tangyouling@loongson.cn> wrote:
> >>>>>>
> >>>>>> This patch adds support for kdump, the kernel will reserve a region
> >>>>>> for the crash kernel and jump there on panic.
> >>>>>>
> >>>>>> Arch-specific functions are added to allow for implementing a crash
> >>>>>> dump file interface, /proc/vmcore, which can be viewed as a ELF file.
> >>>>>>
> >>>>>> A user space tool, like kexec-tools, is responsible for allocating a
> >>>>>> separate region for the core's ELF header within crash kdump kernel
> >>>>>> memory and filling it in when executing kexec_load().
> >>>>>>
> >>>>>> Then, its location will be advertised to crash dump kernel via a new
> >>>>>> device-tree property, "linux,elfcorehdr", and crash dump kernel preserves
> >>>>>> the region for later use with fdt_reserve_elfcorehdr() at boot time.
> >>>>>>
> >>>>>> At the same time, it will also limit the crash kdump kernel to the
> >>>>>> crashkernel area via a new device-tree property, "linux, usable-memory-range",
> >>>>>> so as not to destroy the original kernel dump data.
> >>>>>>
> >>>>>> On crash dump kernel, /proc/vmcore will access the primary kernel's memory
> >>>>>> with copy_oldmem_page().
> >>>>>>
> >>>>>> I tested this on  LoongArch 3A5000 machine and works as expected (Suggest
> >>>>>> crashkernel parameter is "crashkernel=512M@2320M"), you may test it by
> >>>>>> triggering a crash through /proc/sysrq_trigger:
> >>>>>>
> >>>>>>  $ sudo kexec -p /boot/vmlinux-kdump --reuse-cmdline --append="nr_cpus=1"
> >>>>>>  # echo c > /proc/sysrq_trigger
> >>>>>>
> >>>>>> Signed-off-by: Youling Tang <tangyouling@loongson.cn>
> >>>>>> ---
> >>>>>>  arch/loongarch/Kconfig                  |  22 ++++++
> >>>>>>  arch/loongarch/Makefile                 |   4 +
> >>>>>>  arch/loongarch/kernel/Makefile          |   3 +-
> >>>>>>  arch/loongarch/kernel/crash.c           | 100 ++++++++++++++++++++++++
> >>>>>>  arch/loongarch/kernel/crash_dump.c      |  19 +++++
> >>>>>>  arch/loongarch/kernel/machine_kexec.c   |  12 ++-
> >>>>>>  arch/loongarch/kernel/mem.c             |   6 ++
> >>>>>>  arch/loongarch/kernel/relocate_kernel.S |   6 ++
> >>>>>>  arch/loongarch/kernel/setup.c           |  49 ++++++++++++
> >>>>>>  arch/loongarch/kernel/traps.c           |   4 +
> >>>>>>  10 files changed, 217 insertions(+), 8 deletions(-)
> >>>>>>  create mode 100644 arch/loongarch/kernel/crash.c
> >>>>>>  create mode 100644 arch/loongarch/kernel/crash_dump.c
> >>>>>>
> >>>>>> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
> >>>>>> index 903c82fa958d..7c1b07a5b5bd 100644
> >>>>>> --- a/arch/loongarch/Kconfig
> >>>>>> +++ b/arch/loongarch/Kconfig
> >>>>>> @@ -420,6 +420,28 @@ config KEXEC
> >>>>>>
> >>>>>>           The name comes from the similarity to the exec system call.
> >>>>>>
> >>>>>> +config CRASH_DUMP
> >>>>>> +       bool "Build kdump crash kernel"
> >>>>>> +       help
> >>>>>> +         Generate crash dump after being started by kexec. This should
> >>>>>> +         be normally only set in special crash dump kernels which are
> >>>>>> +         loaded in the main kernel with kexec-tools into a specially
> >>>>>> +         reserved region and then later executed after a crash by
> >>>>>> +         kdump/kexec.
> >>>>>> +
> >>>>>> +         For more details see Documentation/admin-guide/kdump/kdump.rst
> >>>>>> +
> >>>>>> +config PHYSICAL_START
> >>>>>> +       hex "Physical address where the kernel is loaded"
> >>>>>> +       default "0x9000000091000000" if 64BIT
> >>>>>> +       depends on CRASH_DUMP
> >>>>>> +       help
> >>>>>> +         This gives the XKPRANGE address where the kernel is loaded.
> >>>>>> +         If you plan to use kernel for capturing the crash dump change
> >>>>>> +         this value to start of the reserved region (the "X" value as
> >>>>>> +         specified in the "crashkernel=YM@XM" command line boot parameter
> >>>>>> +         passed to the panic-ed kernel).
> >>>>>> +
> >>>>>>  config SECCOMP
> >>>>>>         bool "Enable seccomp to safely compute untrusted bytecode"
> >>>>>>         depends on PROC_FS
> >>>>>> diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
> >>>>>> index 4bc47f47cfd8..7dabd580426d 100644
> >>>>>> --- a/arch/loongarch/Makefile
> >>>>>> +++ b/arch/loongarch/Makefile
> >>>>>> @@ -48,7 +48,11 @@ KBUILD_CFLAGS_MODULE         += -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs
> >>>>>>  cflags-y += -ffreestanding
> >>>>>>  cflags-y += $(call cc-option, -mno-check-zero-division)
> >>>>>>
> >>>>>> +ifdef CONFIG_PHYSICAL_START
> >>>>>> +load-y         = $(CONFIG_PHYSICAL_START)
> >>>>>> +else
> >>>>>>  load-y         = 0x9000000000200000
> >>>>>> +endif
> >>>>>>  bootvars-y     = VMLINUX_LOAD_ADDRESS=$(load-y)
> >>>>>>
> >>>>>>  drivers-$(CONFIG_PCI)          += arch/loongarch/pci/
> >>>>>> diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
> >>>>>> index 20b64ac3f128..df5aea129364 100644
> >>>>>> --- a/arch/loongarch/kernel/Makefile
> >>>>>> +++ b/arch/loongarch/kernel/Makefile
> >>>>>> @@ -17,7 +17,8 @@ obj-$(CONFIG_CPU_HAS_FPU)     += fpu.o
> >>>>>>  obj-$(CONFIG_MODULES)          += module.o module-sections.o
> >>>>>>  obj-$(CONFIG_STACKTRACE)       += stacktrace.o
> >>>>>>
> >>>>>> -obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o
> >>>>>> +obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o crash.o
> >>>>>> +obj-$(CONFIG_CRASH_DUMP)        += crash_dump.o
> >>>>>>
> >>>>>>  obj-$(CONFIG_PROC_FS)          += proc.o
> >>>>>>
> >>>>>> diff --git a/arch/loongarch/kernel/crash.c b/arch/loongarch/kernel/crash.c
> >>>>>> new file mode 100644
> >>>>>> index 000000000000..b4f249ec6301
> >>>>>> --- /dev/null
> >>>>>> +++ b/arch/loongarch/kernel/crash.c
> >>>>>> @@ -0,0 +1,100 @@
> >>>>>> +// SPDX-License-Identifier: GPL-2.0
> >>>>>> +/*
> >>>>>> + * Copyright (C) 2022 Loongson Technology Corporation Limited
> >>>>>> + *
> >>>>>> + * Derived from MIPS
> >>>>>> + */
> >>>>>> +#include <linux/kernel.h>
> >>>>>> +#include <linux/smp.h>
> >>>>>> +#include <linux/reboot.h>
> >>>>>> +#include <linux/crash_dump.h>
> >>>>>> +#include <linux/delay.h>
> >>>>>> +#include <linux/irq.h>
> >>>>>> +#include <linux/types.h>
> >>>>>> +#include <linux/sched.h>
> >>>>>> +#include <linux/sched/task_stack.h>
> >>>>>> +#include <asm/cacheflush.h>
> >>>>>> +#include <asm/kexec.h>
> >>>>>> +
> >>>>>> +static cpumask_t cpus_in_crash = CPU_MASK_NONE;
> >>>>>> +
> >>>>>> +#ifdef CONFIG_SMP
> >>>>>> +static void crash_shutdown_secondary(void *passed_regs)
> >>>>>> +{
> >>>>>> +       struct pt_regs *regs = passed_regs;
> >>>>>> +       int cpu = smp_processor_id();
> >>>>>> +
> >>>>>> +       /*
> >>>>>> +        * If we are passed registers, use those.  Otherwise get the
> >>>>>> +        * regs from the last interrupt, which should be correct, as
> >>>>>> +        * we are in an interrupt.  But if the regs are not there,
> >>>>>> +        * pull them from the top of the stack.  They are probably
> >>>>>> +        * wrong, but we need something to keep from crashing again.
> >>>>>> +        */
> >>>>>> +       if (!regs)
> >>>>>> +               regs = get_irq_regs();
> >>>>>> +       if (!regs)
> >>>>>> +               regs = task_pt_regs(current);
> >>>>>> +
> >>>>>> +       local_irq_disable();
> >>>>>> +       if (!cpumask_test_cpu(cpu, &cpus_in_crash))
> >>>>>> +               crash_save_cpu(regs, cpu);
> >>>>>> +       cpumask_set_cpu(cpu, &cpus_in_crash);
> >>>>>> +
> >>>>>> +       while (!atomic_read(&kexec_ready_to_reboot))
> >>>>>> +               cpu_relax();
> >>>>>> +
> >>>>>> +       kexec_reboot();
> >>>>>> +}
> >>>>>> +
> >>>>>> +/* Override the weak function in kernel/panic.c */
> >>>>>> +void crash_smp_send_stop(void)
> >>>>>> +{
> >>>>>> +       static int cpus_stopped;
> >>>>>> +       unsigned long timeout;
> >>>>>> +       unsigned int ncpus;
> >>>>>> +
> >>>>>> +       /*
> >>>>>> +        * This function can be called twice in panic path, but obviously
> >>>>>> +        * we execute this only once.
> >>>>>> +        */
> >>>>>> +       if (cpus_stopped)
> >>>>>> +               return;
> >>>>>> +
> >>>>>> +       cpus_stopped = 1;
> >>>>>> +
> >>>>>> +        /* Excluding the panic cpu */
> >>>>>> +       ncpus = num_online_cpus() - 1;
> >>>>>> +
> >>>>>> +       smp_call_function(crash_shutdown_secondary, NULL, 0);
> >>>>>> +       smp_wmb();
> >>>>>> +
> >>>>>> +       /*
> >>>>>> +        * The crash CPU sends an IPI and wait for other CPUs to
> >>>>>> +        * respond. Delay of at least 10 seconds.
> >>>>>> +        */
> >>>>>> +       pr_emerg("Sending IPI to other cpus...\n");
> >>>>>> +       timeout = USEC_PER_SEC * 10;
> >>>>>> +       while ((cpumask_weight(&cpus_in_crash) < ncpus) && timeout--) {
> >>>>>> +               cpu_relax();
> >>>>>> +               udelay(1);
> >>>>>> +       }
> >>>>>> +}
> >>>>>> +
> >>>>>> +#endif
> >>>>>> +
> >>>>>> +void machine_crash_shutdown(struct pt_regs *regs)
> >>>>>> +{
> >>>>>> +       int crashing_cpu;
> >>>>>> +
> >>>>>> +       local_irq_disable();
> >>>>>> +
> >>>>>> +       crashing_cpu = smp_processor_id();
> >>>>>> +       crash_save_cpu(regs, crashing_cpu);
> >>>>>> +
> >>>>>> +       /* shutdown non-crashing cpus */
> >>>>>> +       crash_smp_send_stop();
> >>>>>> +       cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
> >>>>>> +
> >>>>>> +       pr_info("Starting crashdump kernel...\n");
> >>>>>> +}
> >>>>>> diff --git a/arch/loongarch/kernel/crash_dump.c b/arch/loongarch/kernel/crash_dump.c
> >>>>>> new file mode 100644
> >>>>>> index 000000000000..13e5d2f7870d
> >>>>>> --- /dev/null
> >>>>>> +++ b/arch/loongarch/kernel/crash_dump.c
> >>>>>> @@ -0,0 +1,19 @@
> >>>>>> +// SPDX-License-Identifier: GPL-2.0
> >>>>>> +#include <linux/highmem.h>
> >>>>>> +#include <linux/crash_dump.h>
> >>>>>> +#include <linux/io.h>
> >>>>>> +
> >>>>>> +ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn,
> >>>>>> +                        size_t csize, unsigned long offset)
> >>>>>> +{
> >>>>>> +       void  *vaddr;
> >>>>>> +
> >>>>>> +       if (!csize)
> >>>>>> +               return 0;
> >>>>>> +
> >>>>>> +       vaddr = kmap_local_pfn(pfn);
> >>>>>> +       csize = copy_to_iter(vaddr + offset, csize, iter);
> >>>>>> +       kunmap_local(vaddr);
> >>>>>> +
> >>>>>> +       return csize;
> >>>>>> +}
> >>>>>> diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c
> >>>>>> index 4ffcd4cd9c8c..f793a3ff09a3 100644
> >>>>>> --- a/arch/loongarch/kernel/machine_kexec.c
> >>>>>> +++ b/arch/loongarch/kernel/machine_kexec.c
> >>>>>> @@ -69,7 +69,7 @@ int machine_kexec_prepare(struct kimage *kimage)
> >>>>>>                 continue;
> >>>>>>         }
> >>>>>>
> >>>>>> -       /* kexec need a safe page to save reboot_code_buffer */
> >>>>>> +       /* kexec/kdump need a safe page to save reboot_code_buffer */
> >>>>>>         kimage->control_code_page = virt_to_page((void *)KEXEC_CTRL_CODE);
> >>>>>>
> >>>>>>         reboot_code_buffer =
> >>>>>> @@ -113,10 +113,6 @@ static void kexec_shutdown_secondary(void *)
> >>>>>>
> >>>>>>         kexec_reboot();
> >>>>>>  }
> >>>>>> -
> >>>>>> -void machine_crash_shutdown(struct pt_regs *regs)
> >>>>>> -{
> >>>>>> -}
> >>>>>>  #endif
> >>>>>>
> >>>>>>  void machine_shutdown(void)
> >>>>>> @@ -135,7 +131,8 @@ void machine_kexec(struct kimage *image)
> >>>>>>
> >>>>>>         jump_addr = (unsigned long)phys_to_virt(image->start);
> >>>>>>
> >>>>>> -       first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
> >>>>>> +       if (image->type == KEXEC_TYPE_DEFAULT)
> >>>>>> +               first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
> >>>>>>
> >>>>>>         /*
> >>>>>>          * The generic kexec code builds a page list with physical
> >>>>>> @@ -167,7 +164,8 @@ void machine_kexec(struct kimage *image)
> >>>>>>
> >>>>>>         /*
> >>>>>>          * We know we were online, and there will be no incoming IPIs at
> >>>>>> -        * this point.
> >>>>>> +        * this point. Mark online again before rebooting so that the crash
> >>>>>> +        * analysis tool will see us correctly.
> >>>>>>          */
> >>>>>>         set_cpu_online(smp_processor_id(), true);
> >>>>>>
> >>>>>> diff --git a/arch/loongarch/kernel/mem.c b/arch/loongarch/kernel/mem.c
> >>>>>> index 7423361b0ebc..c6def6ff81c8 100644
> >>>>>> --- a/arch/loongarch/kernel/mem.c
> >>>>>> +++ b/arch/loongarch/kernel/mem.c
> >>>>>> @@ -5,6 +5,7 @@
> >>>>>>  #include <linux/efi.h>
> >>>>>>  #include <linux/initrd.h>
> >>>>>>  #include <linux/memblock.h>
> >>>>>> +#include <linux/of_fdt.h>
> >>>>>>
> >>>>>>  #include <asm/bootinfo.h>
> >>>>>>  #include <asm/loongson.h>
> >>>>>> @@ -61,4 +62,9 @@ void __init memblock_init(void)
> >>>>>>
> >>>>>>         /* Reserve the initrd */
> >>>>>>         reserve_initrd_mem();
> >>>>>> +
> >>>>>> +       /* Mainly reserved memory for the elf core head */
> >>>>>> +       early_init_fdt_scan_reserved_mem();
> >>>>>> +       /* Parse linux,usable-memory-range is for crash dump kernel */
> >>>>>> +       early_init_dt_check_for_usable_mem_range();
> >>>>>>  }
> >>>>>> diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S
> >>>>>> index d1f242f74ea8..4ee5ac4ac2d7 100644
> >>>>>> --- a/arch/loongarch/kernel/relocate_kernel.S
> >>>>>> +++ b/arch/loongarch/kernel/relocate_kernel.S
> >>>>>> @@ -28,6 +28,12 @@ SYM_CODE_START(relocate_new_kernel)
> >>>>>>         move            s2, a2
> >>>>>>         move            s3, a3
> >>>>>>
> >>>>>> +       /*
> >>>>>> +        * In case of a kdump/crash kernel, the indirection page is not
> >>>>>> +        * populated as the kernel is directly copied to a reserved location
> >>>>>> +        */
> >>>>>> +       beqz            s2, done
> >>>>>> +
> >>>>>>  process_entry:
> >>>>>>         PTR_L           s4, s2, 0
> >>>>>>         PTR_ADDI        s2, s2, SZREG
> >>>>>> diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
> >>>>>> index f938aae3e92c..ea34b77e402f 100644
> >>>>>> --- a/arch/loongarch/kernel/setup.c
> >>>>>> +++ b/arch/loongarch/kernel/setup.c
> >>>>>> @@ -19,6 +19,8 @@
> >>>>>>  #include <linux/memblock.h>
> >>>>>>  #include <linux/initrd.h>
> >>>>>>  #include <linux/ioport.h>
> >>>>>> +#include <linux/kexec.h>
> >>>>>> +#include <linux/crash_dump.h>
> >>>>>>  #include <linux/root_dev.h>
> >>>>>>  #include <linux/console.h>
> >>>>>>  #include <linux/pfn.h>
> >>>>>> @@ -186,6 +188,50 @@ static int __init early_parse_mem(char *p)
> >>>>>>  }
> >>>>>>  early_param("mem", early_parse_mem);
> >>>>>>
> >>>>>> +static void __init loongarch_parse_crashkernel(void)
> >>>>>> +{
> >>>>>> +#ifdef CONFIG_KEXEC
> >>>>>> +       unsigned long long start;
> >>>>>> +       unsigned long long total_mem;
> >>>>>> +       unsigned long long crash_size, crash_base;
> >>>>>> +       int ret;
> >>>>>> +
> >>>>>> +       total_mem = memblock_phys_mem_size();
> >>>>>> +       ret = parse_crashkernel(boot_command_line, total_mem,
> >>>>>> +                               &crash_size, &crash_base);
> >>>>>> +       if (ret != 0 || crash_size <= 0)
> >>>>>> +               return;
> >>>>>> +
> >>>>>> +
> >>>>>> +       start = memblock_phys_alloc_range(crash_size, 1, crash_base,
> >>>>>> +                                       crash_base + crash_size);
> >>>>>> +       if (start != crash_base) {
> >>>>>> +               pr_warn("Invalid memory region reserved for crash kernel\n");
> >>>>>> +               return;
> >>>>>> +       }
> >>>>>> +
> >>>>>> +       crashk_res.start = crash_base;
> >>>>>> +       crashk_res.end   = crash_base + crash_size - 1;
> >>>>>> +#endif
> >>>>>> +}
> >>>>>> +
> >>>>>> +static void __init request_crashkernel(struct resource *res)
> >>>>>> +{
> >>>>>> +#ifdef CONFIG_KEXEC
> >>>>>> +       int ret;
> >>>>>> +
> >>>>>> +       if (crashk_res.start == crashk_res.end)
> >>>>>> +               return;
> >>>>>> +
> >>>>>> +       ret = request_resource(res, &crashk_res);
> >>>>>> +       if (!ret)
> >>>>>> +               pr_info("Reserving %ldMB of memory at %ldMB for crashkernel\n",
> >>>>>> +                       (unsigned long)((crashk_res.end -
> >>>>>> +                                        crashk_res.start + 1) >> 20),
> >>>>>> +                       (unsigned long)(crashk_res.start  >> 20));
> >>>>>> +#endif
> >>>>>> +}
> >>>>>> +
> >>>>>>  void __init platform_init(void)
> >>>>>>  {
> >>>>>>         efi_init();
> >>>>>> @@ -229,6 +275,8 @@ static void __init arch_mem_init(char **cmdline_p)
> >>>>>>
> >>>>>>         check_kernel_sections_mem();
> >>>>>>
> >>>>>> +       loongarch_parse_crashkernel();
> >>>>>> +
> >>>>>>         /*
> >>>>>>          * In order to reduce the possibility of kernel panic when failed to
> >>>>>>          * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate
> >>>>>> @@ -290,6 +338,7 @@ static void __init resource_init(void)
> >>>>>>                 request_resource(res, &code_resource);
> >>>>>>                 request_resource(res, &data_resource);
> >>>>>>                 request_resource(res, &bss_resource);
> >>>>>> +               request_crashkernel(res);
> >>>>>>         }
> >>>>>>  }
> >>>>>>
> >>>>>> diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
> >>>>>> index aa1c95aaf595..0e610872f3f4 100644
> >>>>>> --- a/arch/loongarch/kernel/traps.c
> >>>>>> +++ b/arch/loongarch/kernel/traps.c
> >>>>>> @@ -10,6 +10,7 @@
> >>>>>>  #include <linux/entry-common.h>
> >>>>>>  #include <linux/init.h>
> >>>>>>  #include <linux/kernel.h>
> >>>>>> +#include <linux/kexec.h>
> >>>>>>  #include <linux/module.h>
> >>>>>>  #include <linux/extable.h>
> >>>>>>  #include <linux/mm.h>
> >>>>>> @@ -246,6 +247,9 @@ void __noreturn die(const char *str, struct pt_regs *regs)
> >>>>>>
> >>>>>>         oops_exit();
> >>>>>>
> >>>>>> +       if (regs && kexec_should_crash(current))
> >>>>>> +               crash_kexec(regs);
> >>>>>> +
> >>>>>>         if (in_interrupt())
> >>>>>>                 panic("Fatal exception in interrupt");
> >>>>>>
> >>>>>> --
> >>>>>> 2.36.0
> >>>>>>
> >>>>
> >>
>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/3] LoongArch: Add kdump support
  2022-09-05  7:32               ` Huacai Chen
@ 2022-09-05  7:45                 ` Youling Tang
  2022-09-05 13:01                   ` Huacai Chen
  0 siblings, 1 reply; 20+ messages in thread
From: Youling Tang @ 2022-09-05  7:45 UTC (permalink / raw)
  To: Huacai Chen
  Cc: Baoquan He, Eric Biederman, WANG Xuerui, Vivek Goyal, Dave Young,
	Guo Ren, Jiaxun Yang, kexec, loongarch, LKML

Hi, Huacai

On 09/05/2022 03:32 PM, Huacai Chen wrote:
> Hi, Youling,
>
> On Mon, Sep 5, 2022 at 10:22 AM Youling Tang <tangyouling@loongson.cn> wrote:
>>
>>
>>
>> On 09/05/2022 10:14 AM, Huacai Chen wrote:
>>> On Mon, Sep 5, 2022 at 10:04 AM Youling Tang <tangyouling@loongson.cn> wrote:
>>>>
>>>> Hi, Huacai
>>>>
>>>> On 09/05/2022 09:38 AM, Huacai Chen wrote:
>>>>> Hi, Youling,
>>>>>
>>>>> On Mon, Sep 5, 2022 at 8:54 AM Youling Tang <tangyouling@loongson.cn> wrote:
>>>>>>
>>>>>> Hi, Huacai
>>>>>>
>>>>>> On 09/04/2022 08:21 PM, Huacai Chen wrote:
>>>>>>> Hi, Youling,
>>>>>>>
>>>>>>> I think crash.c can be merged into crash_dump.c
>>>>>>
>>>>>> Most architectures only implement copy_oldmem_page() in crash_dump.c,
>>>>>> I'm not sure if merging crash.c into crash_dump.c will break its
>>>>>> consistency?
>>>>>>
>>>>>> Thanks,
>>>>>> Youling
>>>>> Yes, you are right, crash.c cannot be merged into crash_dump.c, but it
>>>>> can be merged into machine_kexec.c, as arm64 and riscv do.
>>>>
>>>> For arm64, machine_crash_shutdown() is placed in machine_kexec.c, and
>>>> crash_smp_send_stop is placed in smp.c. If crash.c needs to be merged
>>>> into machine_kexec.c, should crash_shutdown_secondary and
>>>> crash_smp_send_stop be placed in smp.c?
>>> I don't want to touch smp.c, all merged into machine_kexec.c seems reasonable.
>>
>> Ok, I'll merge all into machine_kexec.c.
>>
>> Youling.
> Another problem, 0x9000000091000000 for PHYSICAL_START is too tricky.
> If you want to skip the "low memory", maybe we can use
> 0x9000000090000000 or 0x90000000a0000000?

Because there are many holes in our memory layout, if PHYSICAL_START is
set to 0x90000000a0000000, the largest reserved area of ​​the crashkernel
will be 512M, beyond which it will fail.

# cat /proc/iomem
90400000-bfffffff : System RAM
c0020000-f9efffff : System RAM
   f6810000-f6813fff : Reserved

The second System RAM starts at 0x90400000, so 0x9000000090000000 will
be too small.

Youling.

>
> Huacai
>>
>>>
>>> Huacai
>>>>
>>>> Youling.
>>>>>
>>>>> Huacai
>>>>>>
>>>>>>>
>>>>>>> Huacai
>>>>>>>
>>>>>>> On Mon, Aug 29, 2022 at 12:37 PM Youling Tang <tangyouling@loongson.cn> wrote:
>>>>>>>>
>>>>>>>> This patch adds support for kdump, the kernel will reserve a region
>>>>>>>> for the crash kernel and jump there on panic.
>>>>>>>>
>>>>>>>> Arch-specific functions are added to allow for implementing a crash
>>>>>>>> dump file interface, /proc/vmcore, which can be viewed as a ELF file.
>>>>>>>>
>>>>>>>> A user space tool, like kexec-tools, is responsible for allocating a
>>>>>>>> separate region for the core's ELF header within crash kdump kernel
>>>>>>>> memory and filling it in when executing kexec_load().
>>>>>>>>
>>>>>>>> Then, its location will be advertised to crash dump kernel via a new
>>>>>>>> device-tree property, "linux,elfcorehdr", and crash dump kernel preserves
>>>>>>>> the region for later use with fdt_reserve_elfcorehdr() at boot time.
>>>>>>>>
>>>>>>>> At the same time, it will also limit the crash kdump kernel to the
>>>>>>>> crashkernel area via a new device-tree property, "linux, usable-memory-range",
>>>>>>>> so as not to destroy the original kernel dump data.
>>>>>>>>
>>>>>>>> On crash dump kernel, /proc/vmcore will access the primary kernel's memory
>>>>>>>> with copy_oldmem_page().
>>>>>>>>
>>>>>>>> I tested this on  LoongArch 3A5000 machine and works as expected (Suggest
>>>>>>>> crashkernel parameter is "crashkernel=512M@2320M"), you may test it by
>>>>>>>> triggering a crash through /proc/sysrq_trigger:
>>>>>>>>
>>>>>>>>  $ sudo kexec -p /boot/vmlinux-kdump --reuse-cmdline --append="nr_cpus=1"
>>>>>>>>  # echo c > /proc/sysrq_trigger
>>>>>>>>
>>>>>>>> Signed-off-by: Youling Tang <tangyouling@loongson.cn>
>>>>>>>> ---
>>>>>>>>  arch/loongarch/Kconfig                  |  22 ++++++
>>>>>>>>  arch/loongarch/Makefile                 |   4 +
>>>>>>>>  arch/loongarch/kernel/Makefile          |   3 +-
>>>>>>>>  arch/loongarch/kernel/crash.c           | 100 ++++++++++++++++++++++++
>>>>>>>>  arch/loongarch/kernel/crash_dump.c      |  19 +++++
>>>>>>>>  arch/loongarch/kernel/machine_kexec.c   |  12 ++-
>>>>>>>>  arch/loongarch/kernel/mem.c             |   6 ++
>>>>>>>>  arch/loongarch/kernel/relocate_kernel.S |   6 ++
>>>>>>>>  arch/loongarch/kernel/setup.c           |  49 ++++++++++++
>>>>>>>>  arch/loongarch/kernel/traps.c           |   4 +
>>>>>>>>  10 files changed, 217 insertions(+), 8 deletions(-)
>>>>>>>>  create mode 100644 arch/loongarch/kernel/crash.c
>>>>>>>>  create mode 100644 arch/loongarch/kernel/crash_dump.c
>>>>>>>>
>>>>>>>> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
>>>>>>>> index 903c82fa958d..7c1b07a5b5bd 100644
>>>>>>>> --- a/arch/loongarch/Kconfig
>>>>>>>> +++ b/arch/loongarch/Kconfig
>>>>>>>> @@ -420,6 +420,28 @@ config KEXEC
>>>>>>>>
>>>>>>>>           The name comes from the similarity to the exec system call.
>>>>>>>>
>>>>>>>> +config CRASH_DUMP
>>>>>>>> +       bool "Build kdump crash kernel"
>>>>>>>> +       help
>>>>>>>> +         Generate crash dump after being started by kexec. This should
>>>>>>>> +         be normally only set in special crash dump kernels which are
>>>>>>>> +         loaded in the main kernel with kexec-tools into a specially
>>>>>>>> +         reserved region and then later executed after a crash by
>>>>>>>> +         kdump/kexec.
>>>>>>>> +
>>>>>>>> +         For more details see Documentation/admin-guide/kdump/kdump.rst
>>>>>>>> +
>>>>>>>> +config PHYSICAL_START
>>>>>>>> +       hex "Physical address where the kernel is loaded"
>>>>>>>> +       default "0x9000000091000000" if 64BIT
>>>>>>>> +       depends on CRASH_DUMP
>>>>>>>> +       help
>>>>>>>> +         This gives the XKPRANGE address where the kernel is loaded.
>>>>>>>> +         If you plan to use kernel for capturing the crash dump change
>>>>>>>> +         this value to start of the reserved region (the "X" value as
>>>>>>>> +         specified in the "crashkernel=YM@XM" command line boot parameter
>>>>>>>> +         passed to the panic-ed kernel).
>>>>>>>> +
>>>>>>>>  config SECCOMP
>>>>>>>>         bool "Enable seccomp to safely compute untrusted bytecode"
>>>>>>>>         depends on PROC_FS
>>>>>>>> diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
>>>>>>>> index 4bc47f47cfd8..7dabd580426d 100644
>>>>>>>> --- a/arch/loongarch/Makefile
>>>>>>>> +++ b/arch/loongarch/Makefile
>>>>>>>> @@ -48,7 +48,11 @@ KBUILD_CFLAGS_MODULE         += -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs
>>>>>>>>  cflags-y += -ffreestanding
>>>>>>>>  cflags-y += $(call cc-option, -mno-check-zero-division)
>>>>>>>>
>>>>>>>> +ifdef CONFIG_PHYSICAL_START
>>>>>>>> +load-y         = $(CONFIG_PHYSICAL_START)
>>>>>>>> +else
>>>>>>>>  load-y         = 0x9000000000200000
>>>>>>>> +endif
>>>>>>>>  bootvars-y     = VMLINUX_LOAD_ADDRESS=$(load-y)
>>>>>>>>
>>>>>>>>  drivers-$(CONFIG_PCI)          += arch/loongarch/pci/
>>>>>>>> diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
>>>>>>>> index 20b64ac3f128..df5aea129364 100644
>>>>>>>> --- a/arch/loongarch/kernel/Makefile
>>>>>>>> +++ b/arch/loongarch/kernel/Makefile
>>>>>>>> @@ -17,7 +17,8 @@ obj-$(CONFIG_CPU_HAS_FPU)     += fpu.o
>>>>>>>>  obj-$(CONFIG_MODULES)          += module.o module-sections.o
>>>>>>>>  obj-$(CONFIG_STACKTRACE)       += stacktrace.o
>>>>>>>>
>>>>>>>> -obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o
>>>>>>>> +obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o crash.o
>>>>>>>> +obj-$(CONFIG_CRASH_DUMP)        += crash_dump.o
>>>>>>>>
>>>>>>>>  obj-$(CONFIG_PROC_FS)          += proc.o
>>>>>>>>
>>>>>>>> diff --git a/arch/loongarch/kernel/crash.c b/arch/loongarch/kernel/crash.c
>>>>>>>> new file mode 100644
>>>>>>>> index 000000000000..b4f249ec6301
>>>>>>>> --- /dev/null
>>>>>>>> +++ b/arch/loongarch/kernel/crash.c
>>>>>>>> @@ -0,0 +1,100 @@
>>>>>>>> +// SPDX-License-Identifier: GPL-2.0
>>>>>>>> +/*
>>>>>>>> + * Copyright (C) 2022 Loongson Technology Corporation Limited
>>>>>>>> + *
>>>>>>>> + * Derived from MIPS
>>>>>>>> + */
>>>>>>>> +#include <linux/kernel.h>
>>>>>>>> +#include <linux/smp.h>
>>>>>>>> +#include <linux/reboot.h>
>>>>>>>> +#include <linux/crash_dump.h>
>>>>>>>> +#include <linux/delay.h>
>>>>>>>> +#include <linux/irq.h>
>>>>>>>> +#include <linux/types.h>
>>>>>>>> +#include <linux/sched.h>
>>>>>>>> +#include <linux/sched/task_stack.h>
>>>>>>>> +#include <asm/cacheflush.h>
>>>>>>>> +#include <asm/kexec.h>
>>>>>>>> +
>>>>>>>> +static cpumask_t cpus_in_crash = CPU_MASK_NONE;
>>>>>>>> +
>>>>>>>> +#ifdef CONFIG_SMP
>>>>>>>> +static void crash_shutdown_secondary(void *passed_regs)
>>>>>>>> +{
>>>>>>>> +       struct pt_regs *regs = passed_regs;
>>>>>>>> +       int cpu = smp_processor_id();
>>>>>>>> +
>>>>>>>> +       /*
>>>>>>>> +        * If we are passed registers, use those.  Otherwise get the
>>>>>>>> +        * regs from the last interrupt, which should be correct, as
>>>>>>>> +        * we are in an interrupt.  But if the regs are not there,
>>>>>>>> +        * pull them from the top of the stack.  They are probably
>>>>>>>> +        * wrong, but we need something to keep from crashing again.
>>>>>>>> +        */
>>>>>>>> +       if (!regs)
>>>>>>>> +               regs = get_irq_regs();
>>>>>>>> +       if (!regs)
>>>>>>>> +               regs = task_pt_regs(current);
>>>>>>>> +
>>>>>>>> +       local_irq_disable();
>>>>>>>> +       if (!cpumask_test_cpu(cpu, &cpus_in_crash))
>>>>>>>> +               crash_save_cpu(regs, cpu);
>>>>>>>> +       cpumask_set_cpu(cpu, &cpus_in_crash);
>>>>>>>> +
>>>>>>>> +       while (!atomic_read(&kexec_ready_to_reboot))
>>>>>>>> +               cpu_relax();
>>>>>>>> +
>>>>>>>> +       kexec_reboot();
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +/* Override the weak function in kernel/panic.c */
>>>>>>>> +void crash_smp_send_stop(void)
>>>>>>>> +{
>>>>>>>> +       static int cpus_stopped;
>>>>>>>> +       unsigned long timeout;
>>>>>>>> +       unsigned int ncpus;
>>>>>>>> +
>>>>>>>> +       /*
>>>>>>>> +        * This function can be called twice in panic path, but obviously
>>>>>>>> +        * we execute this only once.
>>>>>>>> +        */
>>>>>>>> +       if (cpus_stopped)
>>>>>>>> +               return;
>>>>>>>> +
>>>>>>>> +       cpus_stopped = 1;
>>>>>>>> +
>>>>>>>> +        /* Excluding the panic cpu */
>>>>>>>> +       ncpus = num_online_cpus() - 1;
>>>>>>>> +
>>>>>>>> +       smp_call_function(crash_shutdown_secondary, NULL, 0);
>>>>>>>> +       smp_wmb();
>>>>>>>> +
>>>>>>>> +       /*
>>>>>>>> +        * The crash CPU sends an IPI and wait for other CPUs to
>>>>>>>> +        * respond. Delay of at least 10 seconds.
>>>>>>>> +        */
>>>>>>>> +       pr_emerg("Sending IPI to other cpus...\n");
>>>>>>>> +       timeout = USEC_PER_SEC * 10;
>>>>>>>> +       while ((cpumask_weight(&cpus_in_crash) < ncpus) && timeout--) {
>>>>>>>> +               cpu_relax();
>>>>>>>> +               udelay(1);
>>>>>>>> +       }
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +#endif
>>>>>>>> +
>>>>>>>> +void machine_crash_shutdown(struct pt_regs *regs)
>>>>>>>> +{
>>>>>>>> +       int crashing_cpu;
>>>>>>>> +
>>>>>>>> +       local_irq_disable();
>>>>>>>> +
>>>>>>>> +       crashing_cpu = smp_processor_id();
>>>>>>>> +       crash_save_cpu(regs, crashing_cpu);
>>>>>>>> +
>>>>>>>> +       /* shutdown non-crashing cpus */
>>>>>>>> +       crash_smp_send_stop();
>>>>>>>> +       cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
>>>>>>>> +
>>>>>>>> +       pr_info("Starting crashdump kernel...\n");
>>>>>>>> +}
>>>>>>>> diff --git a/arch/loongarch/kernel/crash_dump.c b/arch/loongarch/kernel/crash_dump.c
>>>>>>>> new file mode 100644
>>>>>>>> index 000000000000..13e5d2f7870d
>>>>>>>> --- /dev/null
>>>>>>>> +++ b/arch/loongarch/kernel/crash_dump.c
>>>>>>>> @@ -0,0 +1,19 @@
>>>>>>>> +// SPDX-License-Identifier: GPL-2.0
>>>>>>>> +#include <linux/highmem.h>
>>>>>>>> +#include <linux/crash_dump.h>
>>>>>>>> +#include <linux/io.h>
>>>>>>>> +
>>>>>>>> +ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn,
>>>>>>>> +                        size_t csize, unsigned long offset)
>>>>>>>> +{
>>>>>>>> +       void  *vaddr;
>>>>>>>> +
>>>>>>>> +       if (!csize)
>>>>>>>> +               return 0;
>>>>>>>> +
>>>>>>>> +       vaddr = kmap_local_pfn(pfn);
>>>>>>>> +       csize = copy_to_iter(vaddr + offset, csize, iter);
>>>>>>>> +       kunmap_local(vaddr);
>>>>>>>> +
>>>>>>>> +       return csize;
>>>>>>>> +}
>>>>>>>> diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c
>>>>>>>> index 4ffcd4cd9c8c..f793a3ff09a3 100644
>>>>>>>> --- a/arch/loongarch/kernel/machine_kexec.c
>>>>>>>> +++ b/arch/loongarch/kernel/machine_kexec.c
>>>>>>>> @@ -69,7 +69,7 @@ int machine_kexec_prepare(struct kimage *kimage)
>>>>>>>>                 continue;
>>>>>>>>         }
>>>>>>>>
>>>>>>>> -       /* kexec need a safe page to save reboot_code_buffer */
>>>>>>>> +       /* kexec/kdump need a safe page to save reboot_code_buffer */
>>>>>>>>         kimage->control_code_page = virt_to_page((void *)KEXEC_CTRL_CODE);
>>>>>>>>
>>>>>>>>         reboot_code_buffer =
>>>>>>>> @@ -113,10 +113,6 @@ static void kexec_shutdown_secondary(void *)
>>>>>>>>
>>>>>>>>         kexec_reboot();
>>>>>>>>  }
>>>>>>>> -
>>>>>>>> -void machine_crash_shutdown(struct pt_regs *regs)
>>>>>>>> -{
>>>>>>>> -}
>>>>>>>>  #endif
>>>>>>>>
>>>>>>>>  void machine_shutdown(void)
>>>>>>>> @@ -135,7 +131,8 @@ void machine_kexec(struct kimage *image)
>>>>>>>>
>>>>>>>>         jump_addr = (unsigned long)phys_to_virt(image->start);
>>>>>>>>
>>>>>>>> -       first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
>>>>>>>> +       if (image->type == KEXEC_TYPE_DEFAULT)
>>>>>>>> +               first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
>>>>>>>>
>>>>>>>>         /*
>>>>>>>>          * The generic kexec code builds a page list with physical
>>>>>>>> @@ -167,7 +164,8 @@ void machine_kexec(struct kimage *image)
>>>>>>>>
>>>>>>>>         /*
>>>>>>>>          * We know we were online, and there will be no incoming IPIs at
>>>>>>>> -        * this point.
>>>>>>>> +        * this point. Mark online again before rebooting so that the crash
>>>>>>>> +        * analysis tool will see us correctly.
>>>>>>>>          */
>>>>>>>>         set_cpu_online(smp_processor_id(), true);
>>>>>>>>
>>>>>>>> diff --git a/arch/loongarch/kernel/mem.c b/arch/loongarch/kernel/mem.c
>>>>>>>> index 7423361b0ebc..c6def6ff81c8 100644
>>>>>>>> --- a/arch/loongarch/kernel/mem.c
>>>>>>>> +++ b/arch/loongarch/kernel/mem.c
>>>>>>>> @@ -5,6 +5,7 @@
>>>>>>>>  #include <linux/efi.h>
>>>>>>>>  #include <linux/initrd.h>
>>>>>>>>  #include <linux/memblock.h>
>>>>>>>> +#include <linux/of_fdt.h>
>>>>>>>>
>>>>>>>>  #include <asm/bootinfo.h>
>>>>>>>>  #include <asm/loongson.h>
>>>>>>>> @@ -61,4 +62,9 @@ void __init memblock_init(void)
>>>>>>>>
>>>>>>>>         /* Reserve the initrd */
>>>>>>>>         reserve_initrd_mem();
>>>>>>>> +
>>>>>>>> +       /* Mainly reserved memory for the elf core head */
>>>>>>>> +       early_init_fdt_scan_reserved_mem();
>>>>>>>> +       /* Parse linux,usable-memory-range is for crash dump kernel */
>>>>>>>> +       early_init_dt_check_for_usable_mem_range();
>>>>>>>>  }
>>>>>>>> diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S
>>>>>>>> index d1f242f74ea8..4ee5ac4ac2d7 100644
>>>>>>>> --- a/arch/loongarch/kernel/relocate_kernel.S
>>>>>>>> +++ b/arch/loongarch/kernel/relocate_kernel.S
>>>>>>>> @@ -28,6 +28,12 @@ SYM_CODE_START(relocate_new_kernel)
>>>>>>>>         move            s2, a2
>>>>>>>>         move            s3, a3
>>>>>>>>
>>>>>>>> +       /*
>>>>>>>> +        * In case of a kdump/crash kernel, the indirection page is not
>>>>>>>> +        * populated as the kernel is directly copied to a reserved location
>>>>>>>> +        */
>>>>>>>> +       beqz            s2, done
>>>>>>>> +
>>>>>>>>  process_entry:
>>>>>>>>         PTR_L           s4, s2, 0
>>>>>>>>         PTR_ADDI        s2, s2, SZREG
>>>>>>>> diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
>>>>>>>> index f938aae3e92c..ea34b77e402f 100644
>>>>>>>> --- a/arch/loongarch/kernel/setup.c
>>>>>>>> +++ b/arch/loongarch/kernel/setup.c
>>>>>>>> @@ -19,6 +19,8 @@
>>>>>>>>  #include <linux/memblock.h>
>>>>>>>>  #include <linux/initrd.h>
>>>>>>>>  #include <linux/ioport.h>
>>>>>>>> +#include <linux/kexec.h>
>>>>>>>> +#include <linux/crash_dump.h>
>>>>>>>>  #include <linux/root_dev.h>
>>>>>>>>  #include <linux/console.h>
>>>>>>>>  #include <linux/pfn.h>
>>>>>>>> @@ -186,6 +188,50 @@ static int __init early_parse_mem(char *p)
>>>>>>>>  }
>>>>>>>>  early_param("mem", early_parse_mem);
>>>>>>>>
>>>>>>>> +static void __init loongarch_parse_crashkernel(void)
>>>>>>>> +{
>>>>>>>> +#ifdef CONFIG_KEXEC
>>>>>>>> +       unsigned long long start;
>>>>>>>> +       unsigned long long total_mem;
>>>>>>>> +       unsigned long long crash_size, crash_base;
>>>>>>>> +       int ret;
>>>>>>>> +
>>>>>>>> +       total_mem = memblock_phys_mem_size();
>>>>>>>> +       ret = parse_crashkernel(boot_command_line, total_mem,
>>>>>>>> +                               &crash_size, &crash_base);
>>>>>>>> +       if (ret != 0 || crash_size <= 0)
>>>>>>>> +               return;
>>>>>>>> +
>>>>>>>> +
>>>>>>>> +       start = memblock_phys_alloc_range(crash_size, 1, crash_base,
>>>>>>>> +                                       crash_base + crash_size);
>>>>>>>> +       if (start != crash_base) {
>>>>>>>> +               pr_warn("Invalid memory region reserved for crash kernel\n");
>>>>>>>> +               return;
>>>>>>>> +       }
>>>>>>>> +
>>>>>>>> +       crashk_res.start = crash_base;
>>>>>>>> +       crashk_res.end   = crash_base + crash_size - 1;
>>>>>>>> +#endif
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +static void __init request_crashkernel(struct resource *res)
>>>>>>>> +{
>>>>>>>> +#ifdef CONFIG_KEXEC
>>>>>>>> +       int ret;
>>>>>>>> +
>>>>>>>> +       if (crashk_res.start == crashk_res.end)
>>>>>>>> +               return;
>>>>>>>> +
>>>>>>>> +       ret = request_resource(res, &crashk_res);
>>>>>>>> +       if (!ret)
>>>>>>>> +               pr_info("Reserving %ldMB of memory at %ldMB for crashkernel\n",
>>>>>>>> +                       (unsigned long)((crashk_res.end -
>>>>>>>> +                                        crashk_res.start + 1) >> 20),
>>>>>>>> +                       (unsigned long)(crashk_res.start  >> 20));
>>>>>>>> +#endif
>>>>>>>> +}
>>>>>>>> +
>>>>>>>>  void __init platform_init(void)
>>>>>>>>  {
>>>>>>>>         efi_init();
>>>>>>>> @@ -229,6 +275,8 @@ static void __init arch_mem_init(char **cmdline_p)
>>>>>>>>
>>>>>>>>         check_kernel_sections_mem();
>>>>>>>>
>>>>>>>> +       loongarch_parse_crashkernel();
>>>>>>>> +
>>>>>>>>         /*
>>>>>>>>          * In order to reduce the possibility of kernel panic when failed to
>>>>>>>>          * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate
>>>>>>>> @@ -290,6 +338,7 @@ static void __init resource_init(void)
>>>>>>>>                 request_resource(res, &code_resource);
>>>>>>>>                 request_resource(res, &data_resource);
>>>>>>>>                 request_resource(res, &bss_resource);
>>>>>>>> +               request_crashkernel(res);
>>>>>>>>         }
>>>>>>>>  }
>>>>>>>>
>>>>>>>> diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
>>>>>>>> index aa1c95aaf595..0e610872f3f4 100644
>>>>>>>> --- a/arch/loongarch/kernel/traps.c
>>>>>>>> +++ b/arch/loongarch/kernel/traps.c
>>>>>>>> @@ -10,6 +10,7 @@
>>>>>>>>  #include <linux/entry-common.h>
>>>>>>>>  #include <linux/init.h>
>>>>>>>>  #include <linux/kernel.h>
>>>>>>>> +#include <linux/kexec.h>
>>>>>>>>  #include <linux/module.h>
>>>>>>>>  #include <linux/extable.h>
>>>>>>>>  #include <linux/mm.h>
>>>>>>>> @@ -246,6 +247,9 @@ void __noreturn die(const char *str, struct pt_regs *regs)
>>>>>>>>
>>>>>>>>         oops_exit();
>>>>>>>>
>>>>>>>> +       if (regs && kexec_should_crash(current))
>>>>>>>> +               crash_kexec(regs);
>>>>>>>> +
>>>>>>>>         if (in_interrupt())
>>>>>>>>                 panic("Fatal exception in interrupt");
>>>>>>>>
>>>>>>>> --
>>>>>>>> 2.36.0
>>>>>>>>
>>>>>>
>>>>
>>


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/3] LoongArch: Add kdump support
  2022-09-05  7:45                 ` Youling Tang
@ 2022-09-05 13:01                   ` Huacai Chen
  2022-09-05 13:07                     ` Youling Tang
  0 siblings, 1 reply; 20+ messages in thread
From: Huacai Chen @ 2022-09-05 13:01 UTC (permalink / raw)
  To: Youling Tang
  Cc: Baoquan He, Eric Biederman, WANG Xuerui, Vivek Goyal, Dave Young,
	Guo Ren, Jiaxun Yang, kexec, loongarch, LKML

Hi, Youling,

On Mon, Sep 5, 2022 at 3:45 PM Youling Tang <tangyouling@loongson.cn> wrote:
>
> Hi, Huacai
>
> On 09/05/2022 03:32 PM, Huacai Chen wrote:
> > Hi, Youling,
> >
> > On Mon, Sep 5, 2022 at 10:22 AM Youling Tang <tangyouling@loongson.cn> wrote:
> >>
> >>
> >>
> >> On 09/05/2022 10:14 AM, Huacai Chen wrote:
> >>> On Mon, Sep 5, 2022 at 10:04 AM Youling Tang <tangyouling@loongson.cn> wrote:
> >>>>
> >>>> Hi, Huacai
> >>>>
> >>>> On 09/05/2022 09:38 AM, Huacai Chen wrote:
> >>>>> Hi, Youling,
> >>>>>
> >>>>> On Mon, Sep 5, 2022 at 8:54 AM Youling Tang <tangyouling@loongson.cn> wrote:
> >>>>>>
> >>>>>> Hi, Huacai
> >>>>>>
> >>>>>> On 09/04/2022 08:21 PM, Huacai Chen wrote:
> >>>>>>> Hi, Youling,
> >>>>>>>
> >>>>>>> I think crash.c can be merged into crash_dump.c
> >>>>>>
> >>>>>> Most architectures only implement copy_oldmem_page() in crash_dump.c,
> >>>>>> I'm not sure if merging crash.c into crash_dump.c will break its
> >>>>>> consistency?
> >>>>>>
> >>>>>> Thanks,
> >>>>>> Youling
> >>>>> Yes, you are right, crash.c cannot be merged into crash_dump.c, but it
> >>>>> can be merged into machine_kexec.c, as arm64 and riscv do.
> >>>>
> >>>> For arm64, machine_crash_shutdown() is placed in machine_kexec.c, and
> >>>> crash_smp_send_stop is placed in smp.c. If crash.c needs to be merged
> >>>> into machine_kexec.c, should crash_shutdown_secondary and
> >>>> crash_smp_send_stop be placed in smp.c?
> >>> I don't want to touch smp.c, all merged into machine_kexec.c seems reasonable.
> >>
> >> Ok, I'll merge all into machine_kexec.c.
> >>
> >> Youling.
> > Another problem, 0x9000000091000000 for PHYSICAL_START is too tricky.
> > If you want to skip the "low memory", maybe we can use
> > 0x9000000090000000 or 0x90000000a0000000?
>
> Because there are many holes in our memory layout, if PHYSICAL_START is
> set to 0x90000000a0000000, the largest reserved area of the crashkernel
> will be 512M, beyond which it will fail.
Then 0x9000000090000000 is not suitable, but I think 512M is enough?
If so, let's use 0x90000000a0000000.

Huacai

Huacai
>
> # cat /proc/iomem
> 90400000-bfffffff : System RAM
> c0020000-f9efffff : System RAM
>    f6810000-f6813fff : Reserved
>
> The second System RAM starts at 0x90400000, so 0x9000000090000000 will
> be too small.
>
> Youling.
>
> >
> > Huacai
> >>
> >>>
> >>> Huacai
> >>>>
> >>>> Youling.
> >>>>>
> >>>>> Huacai
> >>>>>>
> >>>>>>>
> >>>>>>> Huacai
> >>>>>>>
> >>>>>>> On Mon, Aug 29, 2022 at 12:37 PM Youling Tang <tangyouling@loongson.cn> wrote:
> >>>>>>>>
> >>>>>>>> This patch adds support for kdump, the kernel will reserve a region
> >>>>>>>> for the crash kernel and jump there on panic.
> >>>>>>>>
> >>>>>>>> Arch-specific functions are added to allow for implementing a crash
> >>>>>>>> dump file interface, /proc/vmcore, which can be viewed as a ELF file.
> >>>>>>>>
> >>>>>>>> A user space tool, like kexec-tools, is responsible for allocating a
> >>>>>>>> separate region for the core's ELF header within crash kdump kernel
> >>>>>>>> memory and filling it in when executing kexec_load().
> >>>>>>>>
> >>>>>>>> Then, its location will be advertised to crash dump kernel via a new
> >>>>>>>> device-tree property, "linux,elfcorehdr", and crash dump kernel preserves
> >>>>>>>> the region for later use with fdt_reserve_elfcorehdr() at boot time.
> >>>>>>>>
> >>>>>>>> At the same time, it will also limit the crash kdump kernel to the
> >>>>>>>> crashkernel area via a new device-tree property, "linux, usable-memory-range",
> >>>>>>>> so as not to destroy the original kernel dump data.
> >>>>>>>>
> >>>>>>>> On crash dump kernel, /proc/vmcore will access the primary kernel's memory
> >>>>>>>> with copy_oldmem_page().
> >>>>>>>>
> >>>>>>>> I tested this on  LoongArch 3A5000 machine and works as expected (Suggest
> >>>>>>>> crashkernel parameter is "crashkernel=512M@2320M"), you may test it by
> >>>>>>>> triggering a crash through /proc/sysrq_trigger:
> >>>>>>>>
> >>>>>>>>  $ sudo kexec -p /boot/vmlinux-kdump --reuse-cmdline --append="nr_cpus=1"
> >>>>>>>>  # echo c > /proc/sysrq_trigger
> >>>>>>>>
> >>>>>>>> Signed-off-by: Youling Tang <tangyouling@loongson.cn>
> >>>>>>>> ---
> >>>>>>>>  arch/loongarch/Kconfig                  |  22 ++++++
> >>>>>>>>  arch/loongarch/Makefile                 |   4 +
> >>>>>>>>  arch/loongarch/kernel/Makefile          |   3 +-
> >>>>>>>>  arch/loongarch/kernel/crash.c           | 100 ++++++++++++++++++++++++
> >>>>>>>>  arch/loongarch/kernel/crash_dump.c      |  19 +++++
> >>>>>>>>  arch/loongarch/kernel/machine_kexec.c   |  12 ++-
> >>>>>>>>  arch/loongarch/kernel/mem.c             |   6 ++
> >>>>>>>>  arch/loongarch/kernel/relocate_kernel.S |   6 ++
> >>>>>>>>  arch/loongarch/kernel/setup.c           |  49 ++++++++++++
> >>>>>>>>  arch/loongarch/kernel/traps.c           |   4 +
> >>>>>>>>  10 files changed, 217 insertions(+), 8 deletions(-)
> >>>>>>>>  create mode 100644 arch/loongarch/kernel/crash.c
> >>>>>>>>  create mode 100644 arch/loongarch/kernel/crash_dump.c
> >>>>>>>>
> >>>>>>>> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
> >>>>>>>> index 903c82fa958d..7c1b07a5b5bd 100644
> >>>>>>>> --- a/arch/loongarch/Kconfig
> >>>>>>>> +++ b/arch/loongarch/Kconfig
> >>>>>>>> @@ -420,6 +420,28 @@ config KEXEC
> >>>>>>>>
> >>>>>>>>           The name comes from the similarity to the exec system call.
> >>>>>>>>
> >>>>>>>> +config CRASH_DUMP
> >>>>>>>> +       bool "Build kdump crash kernel"
> >>>>>>>> +       help
> >>>>>>>> +         Generate crash dump after being started by kexec. This should
> >>>>>>>> +         be normally only set in special crash dump kernels which are
> >>>>>>>> +         loaded in the main kernel with kexec-tools into a specially
> >>>>>>>> +         reserved region and then later executed after a crash by
> >>>>>>>> +         kdump/kexec.
> >>>>>>>> +
> >>>>>>>> +         For more details see Documentation/admin-guide/kdump/kdump.rst
> >>>>>>>> +
> >>>>>>>> +config PHYSICAL_START
> >>>>>>>> +       hex "Physical address where the kernel is loaded"
> >>>>>>>> +       default "0x9000000091000000" if 64BIT
> >>>>>>>> +       depends on CRASH_DUMP
> >>>>>>>> +       help
> >>>>>>>> +         This gives the XKPRANGE address where the kernel is loaded.
> >>>>>>>> +         If you plan to use kernel for capturing the crash dump change
> >>>>>>>> +         this value to start of the reserved region (the "X" value as
> >>>>>>>> +         specified in the "crashkernel=YM@XM" command line boot parameter
> >>>>>>>> +         passed to the panic-ed kernel).
> >>>>>>>> +
> >>>>>>>>  config SECCOMP
> >>>>>>>>         bool "Enable seccomp to safely compute untrusted bytecode"
> >>>>>>>>         depends on PROC_FS
> >>>>>>>> diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
> >>>>>>>> index 4bc47f47cfd8..7dabd580426d 100644
> >>>>>>>> --- a/arch/loongarch/Makefile
> >>>>>>>> +++ b/arch/loongarch/Makefile
> >>>>>>>> @@ -48,7 +48,11 @@ KBUILD_CFLAGS_MODULE         += -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs
> >>>>>>>>  cflags-y += -ffreestanding
> >>>>>>>>  cflags-y += $(call cc-option, -mno-check-zero-division)
> >>>>>>>>
> >>>>>>>> +ifdef CONFIG_PHYSICAL_START
> >>>>>>>> +load-y         = $(CONFIG_PHYSICAL_START)
> >>>>>>>> +else
> >>>>>>>>  load-y         = 0x9000000000200000
> >>>>>>>> +endif
> >>>>>>>>  bootvars-y     = VMLINUX_LOAD_ADDRESS=$(load-y)
> >>>>>>>>
> >>>>>>>>  drivers-$(CONFIG_PCI)          += arch/loongarch/pci/
> >>>>>>>> diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
> >>>>>>>> index 20b64ac3f128..df5aea129364 100644
> >>>>>>>> --- a/arch/loongarch/kernel/Makefile
> >>>>>>>> +++ b/arch/loongarch/kernel/Makefile
> >>>>>>>> @@ -17,7 +17,8 @@ obj-$(CONFIG_CPU_HAS_FPU)     += fpu.o
> >>>>>>>>  obj-$(CONFIG_MODULES)          += module.o module-sections.o
> >>>>>>>>  obj-$(CONFIG_STACKTRACE)       += stacktrace.o
> >>>>>>>>
> >>>>>>>> -obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o
> >>>>>>>> +obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o crash.o
> >>>>>>>> +obj-$(CONFIG_CRASH_DUMP)        += crash_dump.o
> >>>>>>>>
> >>>>>>>>  obj-$(CONFIG_PROC_FS)          += proc.o
> >>>>>>>>
> >>>>>>>> diff --git a/arch/loongarch/kernel/crash.c b/arch/loongarch/kernel/crash.c
> >>>>>>>> new file mode 100644
> >>>>>>>> index 000000000000..b4f249ec6301
> >>>>>>>> --- /dev/null
> >>>>>>>> +++ b/arch/loongarch/kernel/crash.c
> >>>>>>>> @@ -0,0 +1,100 @@
> >>>>>>>> +// SPDX-License-Identifier: GPL-2.0
> >>>>>>>> +/*
> >>>>>>>> + * Copyright (C) 2022 Loongson Technology Corporation Limited
> >>>>>>>> + *
> >>>>>>>> + * Derived from MIPS
> >>>>>>>> + */
> >>>>>>>> +#include <linux/kernel.h>
> >>>>>>>> +#include <linux/smp.h>
> >>>>>>>> +#include <linux/reboot.h>
> >>>>>>>> +#include <linux/crash_dump.h>
> >>>>>>>> +#include <linux/delay.h>
> >>>>>>>> +#include <linux/irq.h>
> >>>>>>>> +#include <linux/types.h>
> >>>>>>>> +#include <linux/sched.h>
> >>>>>>>> +#include <linux/sched/task_stack.h>
> >>>>>>>> +#include <asm/cacheflush.h>
> >>>>>>>> +#include <asm/kexec.h>
> >>>>>>>> +
> >>>>>>>> +static cpumask_t cpus_in_crash = CPU_MASK_NONE;
> >>>>>>>> +
> >>>>>>>> +#ifdef CONFIG_SMP
> >>>>>>>> +static void crash_shutdown_secondary(void *passed_regs)
> >>>>>>>> +{
> >>>>>>>> +       struct pt_regs *regs = passed_regs;
> >>>>>>>> +       int cpu = smp_processor_id();
> >>>>>>>> +
> >>>>>>>> +       /*
> >>>>>>>> +        * If we are passed registers, use those.  Otherwise get the
> >>>>>>>> +        * regs from the last interrupt, which should be correct, as
> >>>>>>>> +        * we are in an interrupt.  But if the regs are not there,
> >>>>>>>> +        * pull them from the top of the stack.  They are probably
> >>>>>>>> +        * wrong, but we need something to keep from crashing again.
> >>>>>>>> +        */
> >>>>>>>> +       if (!regs)
> >>>>>>>> +               regs = get_irq_regs();
> >>>>>>>> +       if (!regs)
> >>>>>>>> +               regs = task_pt_regs(current);
> >>>>>>>> +
> >>>>>>>> +       local_irq_disable();
> >>>>>>>> +       if (!cpumask_test_cpu(cpu, &cpus_in_crash))
> >>>>>>>> +               crash_save_cpu(regs, cpu);
> >>>>>>>> +       cpumask_set_cpu(cpu, &cpus_in_crash);
> >>>>>>>> +
> >>>>>>>> +       while (!atomic_read(&kexec_ready_to_reboot))
> >>>>>>>> +               cpu_relax();
> >>>>>>>> +
> >>>>>>>> +       kexec_reboot();
> >>>>>>>> +}
> >>>>>>>> +
> >>>>>>>> +/* Override the weak function in kernel/panic.c */
> >>>>>>>> +void crash_smp_send_stop(void)
> >>>>>>>> +{
> >>>>>>>> +       static int cpus_stopped;
> >>>>>>>> +       unsigned long timeout;
> >>>>>>>> +       unsigned int ncpus;
> >>>>>>>> +
> >>>>>>>> +       /*
> >>>>>>>> +        * This function can be called twice in panic path, but obviously
> >>>>>>>> +        * we execute this only once.
> >>>>>>>> +        */
> >>>>>>>> +       if (cpus_stopped)
> >>>>>>>> +               return;
> >>>>>>>> +
> >>>>>>>> +       cpus_stopped = 1;
> >>>>>>>> +
> >>>>>>>> +        /* Excluding the panic cpu */
> >>>>>>>> +       ncpus = num_online_cpus() - 1;
> >>>>>>>> +
> >>>>>>>> +       smp_call_function(crash_shutdown_secondary, NULL, 0);
> >>>>>>>> +       smp_wmb();
> >>>>>>>> +
> >>>>>>>> +       /*
> >>>>>>>> +        * The crash CPU sends an IPI and wait for other CPUs to
> >>>>>>>> +        * respond. Delay of at least 10 seconds.
> >>>>>>>> +        */
> >>>>>>>> +       pr_emerg("Sending IPI to other cpus...\n");
> >>>>>>>> +       timeout = USEC_PER_SEC * 10;
> >>>>>>>> +       while ((cpumask_weight(&cpus_in_crash) < ncpus) && timeout--) {
> >>>>>>>> +               cpu_relax();
> >>>>>>>> +               udelay(1);
> >>>>>>>> +       }
> >>>>>>>> +}
> >>>>>>>> +
> >>>>>>>> +#endif
> >>>>>>>> +
> >>>>>>>> +void machine_crash_shutdown(struct pt_regs *regs)
> >>>>>>>> +{
> >>>>>>>> +       int crashing_cpu;
> >>>>>>>> +
> >>>>>>>> +       local_irq_disable();
> >>>>>>>> +
> >>>>>>>> +       crashing_cpu = smp_processor_id();
> >>>>>>>> +       crash_save_cpu(regs, crashing_cpu);
> >>>>>>>> +
> >>>>>>>> +       /* shutdown non-crashing cpus */
> >>>>>>>> +       crash_smp_send_stop();
> >>>>>>>> +       cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
> >>>>>>>> +
> >>>>>>>> +       pr_info("Starting crashdump kernel...\n");
> >>>>>>>> +}
> >>>>>>>> diff --git a/arch/loongarch/kernel/crash_dump.c b/arch/loongarch/kernel/crash_dump.c
> >>>>>>>> new file mode 100644
> >>>>>>>> index 000000000000..13e5d2f7870d
> >>>>>>>> --- /dev/null
> >>>>>>>> +++ b/arch/loongarch/kernel/crash_dump.c
> >>>>>>>> @@ -0,0 +1,19 @@
> >>>>>>>> +// SPDX-License-Identifier: GPL-2.0
> >>>>>>>> +#include <linux/highmem.h>
> >>>>>>>> +#include <linux/crash_dump.h>
> >>>>>>>> +#include <linux/io.h>
> >>>>>>>> +
> >>>>>>>> +ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn,
> >>>>>>>> +                        size_t csize, unsigned long offset)
> >>>>>>>> +{
> >>>>>>>> +       void  *vaddr;
> >>>>>>>> +
> >>>>>>>> +       if (!csize)
> >>>>>>>> +               return 0;
> >>>>>>>> +
> >>>>>>>> +       vaddr = kmap_local_pfn(pfn);
> >>>>>>>> +       csize = copy_to_iter(vaddr + offset, csize, iter);
> >>>>>>>> +       kunmap_local(vaddr);
> >>>>>>>> +
> >>>>>>>> +       return csize;
> >>>>>>>> +}
> >>>>>>>> diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c
> >>>>>>>> index 4ffcd4cd9c8c..f793a3ff09a3 100644
> >>>>>>>> --- a/arch/loongarch/kernel/machine_kexec.c
> >>>>>>>> +++ b/arch/loongarch/kernel/machine_kexec.c
> >>>>>>>> @@ -69,7 +69,7 @@ int machine_kexec_prepare(struct kimage *kimage)
> >>>>>>>>                 continue;
> >>>>>>>>         }
> >>>>>>>>
> >>>>>>>> -       /* kexec need a safe page to save reboot_code_buffer */
> >>>>>>>> +       /* kexec/kdump need a safe page to save reboot_code_buffer */
> >>>>>>>>         kimage->control_code_page = virt_to_page((void *)KEXEC_CTRL_CODE);
> >>>>>>>>
> >>>>>>>>         reboot_code_buffer =
> >>>>>>>> @@ -113,10 +113,6 @@ static void kexec_shutdown_secondary(void *)
> >>>>>>>>
> >>>>>>>>         kexec_reboot();
> >>>>>>>>  }
> >>>>>>>> -
> >>>>>>>> -void machine_crash_shutdown(struct pt_regs *regs)
> >>>>>>>> -{
> >>>>>>>> -}
> >>>>>>>>  #endif
> >>>>>>>>
> >>>>>>>>  void machine_shutdown(void)
> >>>>>>>> @@ -135,7 +131,8 @@ void machine_kexec(struct kimage *image)
> >>>>>>>>
> >>>>>>>>         jump_addr = (unsigned long)phys_to_virt(image->start);
> >>>>>>>>
> >>>>>>>> -       first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
> >>>>>>>> +       if (image->type == KEXEC_TYPE_DEFAULT)
> >>>>>>>> +               first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
> >>>>>>>>
> >>>>>>>>         /*
> >>>>>>>>          * The generic kexec code builds a page list with physical
> >>>>>>>> @@ -167,7 +164,8 @@ void machine_kexec(struct kimage *image)
> >>>>>>>>
> >>>>>>>>         /*
> >>>>>>>>          * We know we were online, and there will be no incoming IPIs at
> >>>>>>>> -        * this point.
> >>>>>>>> +        * this point. Mark online again before rebooting so that the crash
> >>>>>>>> +        * analysis tool will see us correctly.
> >>>>>>>>          */
> >>>>>>>>         set_cpu_online(smp_processor_id(), true);
> >>>>>>>>
> >>>>>>>> diff --git a/arch/loongarch/kernel/mem.c b/arch/loongarch/kernel/mem.c
> >>>>>>>> index 7423361b0ebc..c6def6ff81c8 100644
> >>>>>>>> --- a/arch/loongarch/kernel/mem.c
> >>>>>>>> +++ b/arch/loongarch/kernel/mem.c
> >>>>>>>> @@ -5,6 +5,7 @@
> >>>>>>>>  #include <linux/efi.h>
> >>>>>>>>  #include <linux/initrd.h>
> >>>>>>>>  #include <linux/memblock.h>
> >>>>>>>> +#include <linux/of_fdt.h>
> >>>>>>>>
> >>>>>>>>  #include <asm/bootinfo.h>
> >>>>>>>>  #include <asm/loongson.h>
> >>>>>>>> @@ -61,4 +62,9 @@ void __init memblock_init(void)
> >>>>>>>>
> >>>>>>>>         /* Reserve the initrd */
> >>>>>>>>         reserve_initrd_mem();
> >>>>>>>> +
> >>>>>>>> +       /* Mainly reserved memory for the elf core head */
> >>>>>>>> +       early_init_fdt_scan_reserved_mem();
> >>>>>>>> +       /* Parse linux,usable-memory-range is for crash dump kernel */
> >>>>>>>> +       early_init_dt_check_for_usable_mem_range();
> >>>>>>>>  }
> >>>>>>>> diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S
> >>>>>>>> index d1f242f74ea8..4ee5ac4ac2d7 100644
> >>>>>>>> --- a/arch/loongarch/kernel/relocate_kernel.S
> >>>>>>>> +++ b/arch/loongarch/kernel/relocate_kernel.S
> >>>>>>>> @@ -28,6 +28,12 @@ SYM_CODE_START(relocate_new_kernel)
> >>>>>>>>         move            s2, a2
> >>>>>>>>         move            s3, a3
> >>>>>>>>
> >>>>>>>> +       /*
> >>>>>>>> +        * In case of a kdump/crash kernel, the indirection page is not
> >>>>>>>> +        * populated as the kernel is directly copied to a reserved location
> >>>>>>>> +        */
> >>>>>>>> +       beqz            s2, done
> >>>>>>>> +
> >>>>>>>>  process_entry:
> >>>>>>>>         PTR_L           s4, s2, 0
> >>>>>>>>         PTR_ADDI        s2, s2, SZREG
> >>>>>>>> diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
> >>>>>>>> index f938aae3e92c..ea34b77e402f 100644
> >>>>>>>> --- a/arch/loongarch/kernel/setup.c
> >>>>>>>> +++ b/arch/loongarch/kernel/setup.c
> >>>>>>>> @@ -19,6 +19,8 @@
> >>>>>>>>  #include <linux/memblock.h>
> >>>>>>>>  #include <linux/initrd.h>
> >>>>>>>>  #include <linux/ioport.h>
> >>>>>>>> +#include <linux/kexec.h>
> >>>>>>>> +#include <linux/crash_dump.h>
> >>>>>>>>  #include <linux/root_dev.h>
> >>>>>>>>  #include <linux/console.h>
> >>>>>>>>  #include <linux/pfn.h>
> >>>>>>>> @@ -186,6 +188,50 @@ static int __init early_parse_mem(char *p)
> >>>>>>>>  }
> >>>>>>>>  early_param("mem", early_parse_mem);
> >>>>>>>>
> >>>>>>>> +static void __init loongarch_parse_crashkernel(void)
> >>>>>>>> +{
> >>>>>>>> +#ifdef CONFIG_KEXEC
> >>>>>>>> +       unsigned long long start;
> >>>>>>>> +       unsigned long long total_mem;
> >>>>>>>> +       unsigned long long crash_size, crash_base;
> >>>>>>>> +       int ret;
> >>>>>>>> +
> >>>>>>>> +       total_mem = memblock_phys_mem_size();
> >>>>>>>> +       ret = parse_crashkernel(boot_command_line, total_mem,
> >>>>>>>> +                               &crash_size, &crash_base);
> >>>>>>>> +       if (ret != 0 || crash_size <= 0)
> >>>>>>>> +               return;
> >>>>>>>> +
> >>>>>>>> +
> >>>>>>>> +       start = memblock_phys_alloc_range(crash_size, 1, crash_base,
> >>>>>>>> +                                       crash_base + crash_size);
> >>>>>>>> +       if (start != crash_base) {
> >>>>>>>> +               pr_warn("Invalid memory region reserved for crash kernel\n");
> >>>>>>>> +               return;
> >>>>>>>> +       }
> >>>>>>>> +
> >>>>>>>> +       crashk_res.start = crash_base;
> >>>>>>>> +       crashk_res.end   = crash_base + crash_size - 1;
> >>>>>>>> +#endif
> >>>>>>>> +}
> >>>>>>>> +
> >>>>>>>> +static void __init request_crashkernel(struct resource *res)
> >>>>>>>> +{
> >>>>>>>> +#ifdef CONFIG_KEXEC
> >>>>>>>> +       int ret;
> >>>>>>>> +
> >>>>>>>> +       if (crashk_res.start == crashk_res.end)
> >>>>>>>> +               return;
> >>>>>>>> +
> >>>>>>>> +       ret = request_resource(res, &crashk_res);
> >>>>>>>> +       if (!ret)
> >>>>>>>> +               pr_info("Reserving %ldMB of memory at %ldMB for crashkernel\n",
> >>>>>>>> +                       (unsigned long)((crashk_res.end -
> >>>>>>>> +                                        crashk_res.start + 1) >> 20),
> >>>>>>>> +                       (unsigned long)(crashk_res.start  >> 20));
> >>>>>>>> +#endif
> >>>>>>>> +}
> >>>>>>>> +
> >>>>>>>>  void __init platform_init(void)
> >>>>>>>>  {
> >>>>>>>>         efi_init();
> >>>>>>>> @@ -229,6 +275,8 @@ static void __init arch_mem_init(char **cmdline_p)
> >>>>>>>>
> >>>>>>>>         check_kernel_sections_mem();
> >>>>>>>>
> >>>>>>>> +       loongarch_parse_crashkernel();
> >>>>>>>> +
> >>>>>>>>         /*
> >>>>>>>>          * In order to reduce the possibility of kernel panic when failed to
> >>>>>>>>          * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate
> >>>>>>>> @@ -290,6 +338,7 @@ static void __init resource_init(void)
> >>>>>>>>                 request_resource(res, &code_resource);
> >>>>>>>>                 request_resource(res, &data_resource);
> >>>>>>>>                 request_resource(res, &bss_resource);
> >>>>>>>> +               request_crashkernel(res);
> >>>>>>>>         }
> >>>>>>>>  }
> >>>>>>>>
> >>>>>>>> diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
> >>>>>>>> index aa1c95aaf595..0e610872f3f4 100644
> >>>>>>>> --- a/arch/loongarch/kernel/traps.c
> >>>>>>>> +++ b/arch/loongarch/kernel/traps.c
> >>>>>>>> @@ -10,6 +10,7 @@
> >>>>>>>>  #include <linux/entry-common.h>
> >>>>>>>>  #include <linux/init.h>
> >>>>>>>>  #include <linux/kernel.h>
> >>>>>>>> +#include <linux/kexec.h>
> >>>>>>>>  #include <linux/module.h>
> >>>>>>>>  #include <linux/extable.h>
> >>>>>>>>  #include <linux/mm.h>
> >>>>>>>> @@ -246,6 +247,9 @@ void __noreturn die(const char *str, struct pt_regs *regs)
> >>>>>>>>
> >>>>>>>>         oops_exit();
> >>>>>>>>
> >>>>>>>> +       if (regs && kexec_should_crash(current))
> >>>>>>>> +               crash_kexec(regs);
> >>>>>>>> +
> >>>>>>>>         if (in_interrupt())
> >>>>>>>>                 panic("Fatal exception in interrupt");
> >>>>>>>>
> >>>>>>>> --
> >>>>>>>> 2.36.0
> >>>>>>>>
> >>>>>>
> >>>>
> >>
>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/3] LoongArch: Add kdump support
  2022-09-05 13:01                   ` Huacai Chen
@ 2022-09-05 13:07                     ` Youling Tang
  0 siblings, 0 replies; 20+ messages in thread
From: Youling Tang @ 2022-09-05 13:07 UTC (permalink / raw)
  To: Huacai Chen
  Cc: Baoquan He, Eric Biederman, WANG Xuerui, Vivek Goyal, Dave Young,
	Guo Ren, Jiaxun Yang, kexec, loongarch, LKML

Hi, Huacai

On 09/05/2022 09:01 PM, Huacai Chen wrote:
> Hi, Youling,
>
> On Mon, Sep 5, 2022 at 3:45 PM Youling Tang <tangyouling@loongson.cn> wrote:
>>
>> Hi, Huacai
>>
>> On 09/05/2022 03:32 PM, Huacai Chen wrote:
>>> Hi, Youling,
>>>
>>> On Mon, Sep 5, 2022 at 10:22 AM Youling Tang <tangyouling@loongson.cn> wrote:
>>>>
>>>>
>>>>
>>>> On 09/05/2022 10:14 AM, Huacai Chen wrote:
>>>>> On Mon, Sep 5, 2022 at 10:04 AM Youling Tang <tangyouling@loongson.cn> wrote:
>>>>>>
>>>>>> Hi, Huacai
>>>>>>
>>>>>> On 09/05/2022 09:38 AM, Huacai Chen wrote:
>>>>>>> Hi, Youling,
>>>>>>>
>>>>>>> On Mon, Sep 5, 2022 at 8:54 AM Youling Tang <tangyouling@loongson.cn> wrote:
>>>>>>>>
>>>>>>>> Hi, Huacai
>>>>>>>>
>>>>>>>> On 09/04/2022 08:21 PM, Huacai Chen wrote:
>>>>>>>>> Hi, Youling,
>>>>>>>>>
>>>>>>>>> I think crash.c can be merged into crash_dump.c
>>>>>>>>
>>>>>>>> Most architectures only implement copy_oldmem_page() in crash_dump.c,
>>>>>>>> I'm not sure if merging crash.c into crash_dump.c will break its
>>>>>>>> consistency?
>>>>>>>>
>>>>>>>> Thanks,
>>>>>>>> Youling
>>>>>>> Yes, you are right, crash.c cannot be merged into crash_dump.c, but it
>>>>>>> can be merged into machine_kexec.c, as arm64 and riscv do.
>>>>>>
>>>>>> For arm64, machine_crash_shutdown() is placed in machine_kexec.c, and
>>>>>> crash_smp_send_stop is placed in smp.c. If crash.c needs to be merged
>>>>>> into machine_kexec.c, should crash_shutdown_secondary and
>>>>>> crash_smp_send_stop be placed in smp.c?
>>>>> I don't want to touch smp.c, all merged into machine_kexec.c seems reasonable.
>>>>
>>>> Ok, I'll merge all into machine_kexec.c.
>>>>
>>>> Youling.
>>> Another problem, 0x9000000091000000 for PHYSICAL_START is too tricky.
>>> If you want to skip the "low memory", maybe we can use
>>> 0x9000000090000000 or 0x90000000a0000000?
>>
>> Because there are many holes in our memory layout, if PHYSICAL_START is
>> set to 0x90000000a0000000, the largest reserved area of the crashkernel
>> will be 512M, beyond which it will fail.
> Then 0x9000000090000000 is not suitable, but I think 512M is enough?
> If so, let's use 0x90000000a0000000.

I'm not sure if it's enough for the server machine?
I will change to 0x90000000a0000000.

Youling.
>
> Huacai
>
> Huacai
>>
>> # cat /proc/iomem
>> 90400000-bfffffff : System RAM
>> c0020000-f9efffff : System RAM
>>    f6810000-f6813fff : Reserved
>>
>> The second System RAM starts at 0x90400000, so 0x9000000090000000 will
>> be too small.
>>
>> Youling.
>>
>>>
>>> Huacai
>>>>
>>>>>
>>>>> Huacai
>>>>>>
>>>>>> Youling.
>>>>>>>
>>>>>>> Huacai
>>>>>>>>
>>>>>>>>>
>>>>>>>>> Huacai
>>>>>>>>>
>>>>>>>>> On Mon, Aug 29, 2022 at 12:37 PM Youling Tang <tangyouling@loongson.cn> wrote:
>>>>>>>>>>
>>>>>>>>>> This patch adds support for kdump, the kernel will reserve a region
>>>>>>>>>> for the crash kernel and jump there on panic.
>>>>>>>>>>
>>>>>>>>>> Arch-specific functions are added to allow for implementing a crash
>>>>>>>>>> dump file interface, /proc/vmcore, which can be viewed as a ELF file.
>>>>>>>>>>
>>>>>>>>>> A user space tool, like kexec-tools, is responsible for allocating a
>>>>>>>>>> separate region for the core's ELF header within crash kdump kernel
>>>>>>>>>> memory and filling it in when executing kexec_load().
>>>>>>>>>>
>>>>>>>>>> Then, its location will be advertised to crash dump kernel via a new
>>>>>>>>>> device-tree property, "linux,elfcorehdr", and crash dump kernel preserves
>>>>>>>>>> the region for later use with fdt_reserve_elfcorehdr() at boot time.
>>>>>>>>>>
>>>>>>>>>> At the same time, it will also limit the crash kdump kernel to the
>>>>>>>>>> crashkernel area via a new device-tree property, "linux, usable-memory-range",
>>>>>>>>>> so as not to destroy the original kernel dump data.
>>>>>>>>>>
>>>>>>>>>> On crash dump kernel, /proc/vmcore will access the primary kernel's memory
>>>>>>>>>> with copy_oldmem_page().
>>>>>>>>>>
>>>>>>>>>> I tested this on  LoongArch 3A5000 machine and works as expected (Suggest
>>>>>>>>>> crashkernel parameter is "crashkernel=512M@2320M"), you may test it by
>>>>>>>>>> triggering a crash through /proc/sysrq_trigger:
>>>>>>>>>>
>>>>>>>>>>  $ sudo kexec -p /boot/vmlinux-kdump --reuse-cmdline --append="nr_cpus=1"
>>>>>>>>>>  # echo c > /proc/sysrq_trigger
>>>>>>>>>>
>>>>>>>>>> Signed-off-by: Youling Tang <tangyouling@loongson.cn>
>>>>>>>>>> ---
>>>>>>>>>>  arch/loongarch/Kconfig                  |  22 ++++++
>>>>>>>>>>  arch/loongarch/Makefile                 |   4 +
>>>>>>>>>>  arch/loongarch/kernel/Makefile          |   3 +-
>>>>>>>>>>  arch/loongarch/kernel/crash.c           | 100 ++++++++++++++++++++++++
>>>>>>>>>>  arch/loongarch/kernel/crash_dump.c      |  19 +++++
>>>>>>>>>>  arch/loongarch/kernel/machine_kexec.c   |  12 ++-
>>>>>>>>>>  arch/loongarch/kernel/mem.c             |   6 ++
>>>>>>>>>>  arch/loongarch/kernel/relocate_kernel.S |   6 ++
>>>>>>>>>>  arch/loongarch/kernel/setup.c           |  49 ++++++++++++
>>>>>>>>>>  arch/loongarch/kernel/traps.c           |   4 +
>>>>>>>>>>  10 files changed, 217 insertions(+), 8 deletions(-)
>>>>>>>>>>  create mode 100644 arch/loongarch/kernel/crash.c
>>>>>>>>>>  create mode 100644 arch/loongarch/kernel/crash_dump.c
>>>>>>>>>>
>>>>>>>>>> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
>>>>>>>>>> index 903c82fa958d..7c1b07a5b5bd 100644
>>>>>>>>>> --- a/arch/loongarch/Kconfig
>>>>>>>>>> +++ b/arch/loongarch/Kconfig
>>>>>>>>>> @@ -420,6 +420,28 @@ config KEXEC
>>>>>>>>>>
>>>>>>>>>>           The name comes from the similarity to the exec system call.
>>>>>>>>>>
>>>>>>>>>> +config CRASH_DUMP
>>>>>>>>>> +       bool "Build kdump crash kernel"
>>>>>>>>>> +       help
>>>>>>>>>> +         Generate crash dump after being started by kexec. This should
>>>>>>>>>> +         be normally only set in special crash dump kernels which are
>>>>>>>>>> +         loaded in the main kernel with kexec-tools into a specially
>>>>>>>>>> +         reserved region and then later executed after a crash by
>>>>>>>>>> +         kdump/kexec.
>>>>>>>>>> +
>>>>>>>>>> +         For more details see Documentation/admin-guide/kdump/kdump.rst
>>>>>>>>>> +
>>>>>>>>>> +config PHYSICAL_START
>>>>>>>>>> +       hex "Physical address where the kernel is loaded"
>>>>>>>>>> +       default "0x9000000091000000" if 64BIT
>>>>>>>>>> +       depends on CRASH_DUMP
>>>>>>>>>> +       help
>>>>>>>>>> +         This gives the XKPRANGE address where the kernel is loaded.
>>>>>>>>>> +         If you plan to use kernel for capturing the crash dump change
>>>>>>>>>> +         this value to start of the reserved region (the "X" value as
>>>>>>>>>> +         specified in the "crashkernel=YM@XM" command line boot parameter
>>>>>>>>>> +         passed to the panic-ed kernel).
>>>>>>>>>> +
>>>>>>>>>>  config SECCOMP
>>>>>>>>>>         bool "Enable seccomp to safely compute untrusted bytecode"
>>>>>>>>>>         depends on PROC_FS
>>>>>>>>>> diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
>>>>>>>>>> index 4bc47f47cfd8..7dabd580426d 100644
>>>>>>>>>> --- a/arch/loongarch/Makefile
>>>>>>>>>> +++ b/arch/loongarch/Makefile
>>>>>>>>>> @@ -48,7 +48,11 @@ KBUILD_CFLAGS_MODULE         += -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs
>>>>>>>>>>  cflags-y += -ffreestanding
>>>>>>>>>>  cflags-y += $(call cc-option, -mno-check-zero-division)
>>>>>>>>>>
>>>>>>>>>> +ifdef CONFIG_PHYSICAL_START
>>>>>>>>>> +load-y         = $(CONFIG_PHYSICAL_START)
>>>>>>>>>> +else
>>>>>>>>>>  load-y         = 0x9000000000200000
>>>>>>>>>> +endif
>>>>>>>>>>  bootvars-y     = VMLINUX_LOAD_ADDRESS=$(load-y)
>>>>>>>>>>
>>>>>>>>>>  drivers-$(CONFIG_PCI)          += arch/loongarch/pci/
>>>>>>>>>> diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
>>>>>>>>>> index 20b64ac3f128..df5aea129364 100644
>>>>>>>>>> --- a/arch/loongarch/kernel/Makefile
>>>>>>>>>> +++ b/arch/loongarch/kernel/Makefile
>>>>>>>>>> @@ -17,7 +17,8 @@ obj-$(CONFIG_CPU_HAS_FPU)     += fpu.o
>>>>>>>>>>  obj-$(CONFIG_MODULES)          += module.o module-sections.o
>>>>>>>>>>  obj-$(CONFIG_STACKTRACE)       += stacktrace.o
>>>>>>>>>>
>>>>>>>>>> -obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o
>>>>>>>>>> +obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o crash.o
>>>>>>>>>> +obj-$(CONFIG_CRASH_DUMP)        += crash_dump.o
>>>>>>>>>>
>>>>>>>>>>  obj-$(CONFIG_PROC_FS)          += proc.o
>>>>>>>>>>
>>>>>>>>>> diff --git a/arch/loongarch/kernel/crash.c b/arch/loongarch/kernel/crash.c
>>>>>>>>>> new file mode 100644
>>>>>>>>>> index 000000000000..b4f249ec6301
>>>>>>>>>> --- /dev/null
>>>>>>>>>> +++ b/arch/loongarch/kernel/crash.c
>>>>>>>>>> @@ -0,0 +1,100 @@
>>>>>>>>>> +// SPDX-License-Identifier: GPL-2.0
>>>>>>>>>> +/*
>>>>>>>>>> + * Copyright (C) 2022 Loongson Technology Corporation Limited
>>>>>>>>>> + *
>>>>>>>>>> + * Derived from MIPS
>>>>>>>>>> + */
>>>>>>>>>> +#include <linux/kernel.h>
>>>>>>>>>> +#include <linux/smp.h>
>>>>>>>>>> +#include <linux/reboot.h>
>>>>>>>>>> +#include <linux/crash_dump.h>
>>>>>>>>>> +#include <linux/delay.h>
>>>>>>>>>> +#include <linux/irq.h>
>>>>>>>>>> +#include <linux/types.h>
>>>>>>>>>> +#include <linux/sched.h>
>>>>>>>>>> +#include <linux/sched/task_stack.h>
>>>>>>>>>> +#include <asm/cacheflush.h>
>>>>>>>>>> +#include <asm/kexec.h>
>>>>>>>>>> +
>>>>>>>>>> +static cpumask_t cpus_in_crash = CPU_MASK_NONE;
>>>>>>>>>> +
>>>>>>>>>> +#ifdef CONFIG_SMP
>>>>>>>>>> +static void crash_shutdown_secondary(void *passed_regs)
>>>>>>>>>> +{
>>>>>>>>>> +       struct pt_regs *regs = passed_regs;
>>>>>>>>>> +       int cpu = smp_processor_id();
>>>>>>>>>> +
>>>>>>>>>> +       /*
>>>>>>>>>> +        * If we are passed registers, use those.  Otherwise get the
>>>>>>>>>> +        * regs from the last interrupt, which should be correct, as
>>>>>>>>>> +        * we are in an interrupt.  But if the regs are not there,
>>>>>>>>>> +        * pull them from the top of the stack.  They are probably
>>>>>>>>>> +        * wrong, but we need something to keep from crashing again.
>>>>>>>>>> +        */
>>>>>>>>>> +       if (!regs)
>>>>>>>>>> +               regs = get_irq_regs();
>>>>>>>>>> +       if (!regs)
>>>>>>>>>> +               regs = task_pt_regs(current);
>>>>>>>>>> +
>>>>>>>>>> +       local_irq_disable();
>>>>>>>>>> +       if (!cpumask_test_cpu(cpu, &cpus_in_crash))
>>>>>>>>>> +               crash_save_cpu(regs, cpu);
>>>>>>>>>> +       cpumask_set_cpu(cpu, &cpus_in_crash);
>>>>>>>>>> +
>>>>>>>>>> +       while (!atomic_read(&kexec_ready_to_reboot))
>>>>>>>>>> +               cpu_relax();
>>>>>>>>>> +
>>>>>>>>>> +       kexec_reboot();
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>> +/* Override the weak function in kernel/panic.c */
>>>>>>>>>> +void crash_smp_send_stop(void)
>>>>>>>>>> +{
>>>>>>>>>> +       static int cpus_stopped;
>>>>>>>>>> +       unsigned long timeout;
>>>>>>>>>> +       unsigned int ncpus;
>>>>>>>>>> +
>>>>>>>>>> +       /*
>>>>>>>>>> +        * This function can be called twice in panic path, but obviously
>>>>>>>>>> +        * we execute this only once.
>>>>>>>>>> +        */
>>>>>>>>>> +       if (cpus_stopped)
>>>>>>>>>> +               return;
>>>>>>>>>> +
>>>>>>>>>> +       cpus_stopped = 1;
>>>>>>>>>> +
>>>>>>>>>> +        /* Excluding the panic cpu */
>>>>>>>>>> +       ncpus = num_online_cpus() - 1;
>>>>>>>>>> +
>>>>>>>>>> +       smp_call_function(crash_shutdown_secondary, NULL, 0);
>>>>>>>>>> +       smp_wmb();
>>>>>>>>>> +
>>>>>>>>>> +       /*
>>>>>>>>>> +        * The crash CPU sends an IPI and wait for other CPUs to
>>>>>>>>>> +        * respond. Delay of at least 10 seconds.
>>>>>>>>>> +        */
>>>>>>>>>> +       pr_emerg("Sending IPI to other cpus...\n");
>>>>>>>>>> +       timeout = USEC_PER_SEC * 10;
>>>>>>>>>> +       while ((cpumask_weight(&cpus_in_crash) < ncpus) && timeout--) {
>>>>>>>>>> +               cpu_relax();
>>>>>>>>>> +               udelay(1);
>>>>>>>>>> +       }
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>> +#endif
>>>>>>>>>> +
>>>>>>>>>> +void machine_crash_shutdown(struct pt_regs *regs)
>>>>>>>>>> +{
>>>>>>>>>> +       int crashing_cpu;
>>>>>>>>>> +
>>>>>>>>>> +       local_irq_disable();
>>>>>>>>>> +
>>>>>>>>>> +       crashing_cpu = smp_processor_id();
>>>>>>>>>> +       crash_save_cpu(regs, crashing_cpu);
>>>>>>>>>> +
>>>>>>>>>> +       /* shutdown non-crashing cpus */
>>>>>>>>>> +       crash_smp_send_stop();
>>>>>>>>>> +       cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
>>>>>>>>>> +
>>>>>>>>>> +       pr_info("Starting crashdump kernel...\n");
>>>>>>>>>> +}
>>>>>>>>>> diff --git a/arch/loongarch/kernel/crash_dump.c b/arch/loongarch/kernel/crash_dump.c
>>>>>>>>>> new file mode 100644
>>>>>>>>>> index 000000000000..13e5d2f7870d
>>>>>>>>>> --- /dev/null
>>>>>>>>>> +++ b/arch/loongarch/kernel/crash_dump.c
>>>>>>>>>> @@ -0,0 +1,19 @@
>>>>>>>>>> +// SPDX-License-Identifier: GPL-2.0
>>>>>>>>>> +#include <linux/highmem.h>
>>>>>>>>>> +#include <linux/crash_dump.h>
>>>>>>>>>> +#include <linux/io.h>
>>>>>>>>>> +
>>>>>>>>>> +ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn,
>>>>>>>>>> +                        size_t csize, unsigned long offset)
>>>>>>>>>> +{
>>>>>>>>>> +       void  *vaddr;
>>>>>>>>>> +
>>>>>>>>>> +       if (!csize)
>>>>>>>>>> +               return 0;
>>>>>>>>>> +
>>>>>>>>>> +       vaddr = kmap_local_pfn(pfn);
>>>>>>>>>> +       csize = copy_to_iter(vaddr + offset, csize, iter);
>>>>>>>>>> +       kunmap_local(vaddr);
>>>>>>>>>> +
>>>>>>>>>> +       return csize;
>>>>>>>>>> +}
>>>>>>>>>> diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c
>>>>>>>>>> index 4ffcd4cd9c8c..f793a3ff09a3 100644
>>>>>>>>>> --- a/arch/loongarch/kernel/machine_kexec.c
>>>>>>>>>> +++ b/arch/loongarch/kernel/machine_kexec.c
>>>>>>>>>> @@ -69,7 +69,7 @@ int machine_kexec_prepare(struct kimage *kimage)
>>>>>>>>>>                 continue;
>>>>>>>>>>         }
>>>>>>>>>>
>>>>>>>>>> -       /* kexec need a safe page to save reboot_code_buffer */
>>>>>>>>>> +       /* kexec/kdump need a safe page to save reboot_code_buffer */
>>>>>>>>>>         kimage->control_code_page = virt_to_page((void *)KEXEC_CTRL_CODE);
>>>>>>>>>>
>>>>>>>>>>         reboot_code_buffer =
>>>>>>>>>> @@ -113,10 +113,6 @@ static void kexec_shutdown_secondary(void *)
>>>>>>>>>>
>>>>>>>>>>         kexec_reboot();
>>>>>>>>>>  }
>>>>>>>>>> -
>>>>>>>>>> -void machine_crash_shutdown(struct pt_regs *regs)
>>>>>>>>>> -{
>>>>>>>>>> -}
>>>>>>>>>>  #endif
>>>>>>>>>>
>>>>>>>>>>  void machine_shutdown(void)
>>>>>>>>>> @@ -135,7 +131,8 @@ void machine_kexec(struct kimage *image)
>>>>>>>>>>
>>>>>>>>>>         jump_addr = (unsigned long)phys_to_virt(image->start);
>>>>>>>>>>
>>>>>>>>>> -       first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
>>>>>>>>>> +       if (image->type == KEXEC_TYPE_DEFAULT)
>>>>>>>>>> +               first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
>>>>>>>>>>
>>>>>>>>>>         /*
>>>>>>>>>>          * The generic kexec code builds a page list with physical
>>>>>>>>>> @@ -167,7 +164,8 @@ void machine_kexec(struct kimage *image)
>>>>>>>>>>
>>>>>>>>>>         /*
>>>>>>>>>>          * We know we were online, and there will be no incoming IPIs at
>>>>>>>>>> -        * this point.
>>>>>>>>>> +        * this point. Mark online again before rebooting so that the crash
>>>>>>>>>> +        * analysis tool will see us correctly.
>>>>>>>>>>          */
>>>>>>>>>>         set_cpu_online(smp_processor_id(), true);
>>>>>>>>>>
>>>>>>>>>> diff --git a/arch/loongarch/kernel/mem.c b/arch/loongarch/kernel/mem.c
>>>>>>>>>> index 7423361b0ebc..c6def6ff81c8 100644
>>>>>>>>>> --- a/arch/loongarch/kernel/mem.c
>>>>>>>>>> +++ b/arch/loongarch/kernel/mem.c
>>>>>>>>>> @@ -5,6 +5,7 @@
>>>>>>>>>>  #include <linux/efi.h>
>>>>>>>>>>  #include <linux/initrd.h>
>>>>>>>>>>  #include <linux/memblock.h>
>>>>>>>>>> +#include <linux/of_fdt.h>
>>>>>>>>>>
>>>>>>>>>>  #include <asm/bootinfo.h>
>>>>>>>>>>  #include <asm/loongson.h>
>>>>>>>>>> @@ -61,4 +62,9 @@ void __init memblock_init(void)
>>>>>>>>>>
>>>>>>>>>>         /* Reserve the initrd */
>>>>>>>>>>         reserve_initrd_mem();
>>>>>>>>>> +
>>>>>>>>>> +       /* Mainly reserved memory for the elf core head */
>>>>>>>>>> +       early_init_fdt_scan_reserved_mem();
>>>>>>>>>> +       /* Parse linux,usable-memory-range is for crash dump kernel */
>>>>>>>>>> +       early_init_dt_check_for_usable_mem_range();
>>>>>>>>>>  }
>>>>>>>>>> diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S
>>>>>>>>>> index d1f242f74ea8..4ee5ac4ac2d7 100644
>>>>>>>>>> --- a/arch/loongarch/kernel/relocate_kernel.S
>>>>>>>>>> +++ b/arch/loongarch/kernel/relocate_kernel.S
>>>>>>>>>> @@ -28,6 +28,12 @@ SYM_CODE_START(relocate_new_kernel)
>>>>>>>>>>         move            s2, a2
>>>>>>>>>>         move            s3, a3
>>>>>>>>>>
>>>>>>>>>> +       /*
>>>>>>>>>> +        * In case of a kdump/crash kernel, the indirection page is not
>>>>>>>>>> +        * populated as the kernel is directly copied to a reserved location
>>>>>>>>>> +        */
>>>>>>>>>> +       beqz            s2, done
>>>>>>>>>> +
>>>>>>>>>>  process_entry:
>>>>>>>>>>         PTR_L           s4, s2, 0
>>>>>>>>>>         PTR_ADDI        s2, s2, SZREG
>>>>>>>>>> diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
>>>>>>>>>> index f938aae3e92c..ea34b77e402f 100644
>>>>>>>>>> --- a/arch/loongarch/kernel/setup.c
>>>>>>>>>> +++ b/arch/loongarch/kernel/setup.c
>>>>>>>>>> @@ -19,6 +19,8 @@
>>>>>>>>>>  #include <linux/memblock.h>
>>>>>>>>>>  #include <linux/initrd.h>
>>>>>>>>>>  #include <linux/ioport.h>
>>>>>>>>>> +#include <linux/kexec.h>
>>>>>>>>>> +#include <linux/crash_dump.h>
>>>>>>>>>>  #include <linux/root_dev.h>
>>>>>>>>>>  #include <linux/console.h>
>>>>>>>>>>  #include <linux/pfn.h>
>>>>>>>>>> @@ -186,6 +188,50 @@ static int __init early_parse_mem(char *p)
>>>>>>>>>>  }
>>>>>>>>>>  early_param("mem", early_parse_mem);
>>>>>>>>>>
>>>>>>>>>> +static void __init loongarch_parse_crashkernel(void)
>>>>>>>>>> +{
>>>>>>>>>> +#ifdef CONFIG_KEXEC
>>>>>>>>>> +       unsigned long long start;
>>>>>>>>>> +       unsigned long long total_mem;
>>>>>>>>>> +       unsigned long long crash_size, crash_base;
>>>>>>>>>> +       int ret;
>>>>>>>>>> +
>>>>>>>>>> +       total_mem = memblock_phys_mem_size();
>>>>>>>>>> +       ret = parse_crashkernel(boot_command_line, total_mem,
>>>>>>>>>> +                               &crash_size, &crash_base);
>>>>>>>>>> +       if (ret != 0 || crash_size <= 0)
>>>>>>>>>> +               return;
>>>>>>>>>> +
>>>>>>>>>> +
>>>>>>>>>> +       start = memblock_phys_alloc_range(crash_size, 1, crash_base,
>>>>>>>>>> +                                       crash_base + crash_size);
>>>>>>>>>> +       if (start != crash_base) {
>>>>>>>>>> +               pr_warn("Invalid memory region reserved for crash kernel\n");
>>>>>>>>>> +               return;
>>>>>>>>>> +       }
>>>>>>>>>> +
>>>>>>>>>> +       crashk_res.start = crash_base;
>>>>>>>>>> +       crashk_res.end   = crash_base + crash_size - 1;
>>>>>>>>>> +#endif
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>> +static void __init request_crashkernel(struct resource *res)
>>>>>>>>>> +{
>>>>>>>>>> +#ifdef CONFIG_KEXEC
>>>>>>>>>> +       int ret;
>>>>>>>>>> +
>>>>>>>>>> +       if (crashk_res.start == crashk_res.end)
>>>>>>>>>> +               return;
>>>>>>>>>> +
>>>>>>>>>> +       ret = request_resource(res, &crashk_res);
>>>>>>>>>> +       if (!ret)
>>>>>>>>>> +               pr_info("Reserving %ldMB of memory at %ldMB for crashkernel\n",
>>>>>>>>>> +                       (unsigned long)((crashk_res.end -
>>>>>>>>>> +                                        crashk_res.start + 1) >> 20),
>>>>>>>>>> +                       (unsigned long)(crashk_res.start  >> 20));
>>>>>>>>>> +#endif
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>>  void __init platform_init(void)
>>>>>>>>>>  {
>>>>>>>>>>         efi_init();
>>>>>>>>>> @@ -229,6 +275,8 @@ static void __init arch_mem_init(char **cmdline_p)
>>>>>>>>>>
>>>>>>>>>>         check_kernel_sections_mem();
>>>>>>>>>>
>>>>>>>>>> +       loongarch_parse_crashkernel();
>>>>>>>>>> +
>>>>>>>>>>         /*
>>>>>>>>>>          * In order to reduce the possibility of kernel panic when failed to
>>>>>>>>>>          * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate
>>>>>>>>>> @@ -290,6 +338,7 @@ static void __init resource_init(void)
>>>>>>>>>>                 request_resource(res, &code_resource);
>>>>>>>>>>                 request_resource(res, &data_resource);
>>>>>>>>>>                 request_resource(res, &bss_resource);
>>>>>>>>>> +               request_crashkernel(res);
>>>>>>>>>>         }
>>>>>>>>>>  }
>>>>>>>>>>
>>>>>>>>>> diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
>>>>>>>>>> index aa1c95aaf595..0e610872f3f4 100644
>>>>>>>>>> --- a/arch/loongarch/kernel/traps.c
>>>>>>>>>> +++ b/arch/loongarch/kernel/traps.c
>>>>>>>>>> @@ -10,6 +10,7 @@
>>>>>>>>>>  #include <linux/entry-common.h>
>>>>>>>>>>  #include <linux/init.h>
>>>>>>>>>>  #include <linux/kernel.h>
>>>>>>>>>> +#include <linux/kexec.h>
>>>>>>>>>>  #include <linux/module.h>
>>>>>>>>>>  #include <linux/extable.h>
>>>>>>>>>>  #include <linux/mm.h>
>>>>>>>>>> @@ -246,6 +247,9 @@ void __noreturn die(const char *str, struct pt_regs *regs)
>>>>>>>>>>
>>>>>>>>>>         oops_exit();
>>>>>>>>>>
>>>>>>>>>> +       if (regs && kexec_should_crash(current))
>>>>>>>>>> +               crash_kexec(regs);
>>>>>>>>>> +
>>>>>>>>>>         if (in_interrupt())
>>>>>>>>>>                 panic("Fatal exception in interrupt");
>>>>>>>>>>
>>>>>>>>>> --
>>>>>>>>>> 2.36.0
>>>>>>>>>>
>>>>>>>>
>>>>>>
>>>>
>>


^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2022-09-05 13:07 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-08-29  4:37 [PATCH 0/3] LoongArch: Add kexec/kdump support Youling Tang
2022-08-29  4:37 ` [PATCH 1/3] LoongArch: Add kexec support Youling Tang
2022-08-30  1:53   ` Jinyang He
2022-08-30  3:25     ` Youling Tang
2022-08-30  3:42       ` Huacai Chen
2022-08-30  4:07         ` Jinyang He
2022-08-31  3:38       ` Youling Tang
2022-09-05  1:01   ` Youling Tang
2022-08-29  4:37 ` [PATCH 2/3] LoongArch: Add kdump support Youling Tang
2022-09-04 12:21   ` Huacai Chen
2022-09-05  0:54     ` Youling Tang
2022-09-05  1:38       ` Huacai Chen
2022-09-05  2:04         ` Youling Tang
2022-09-05  2:14           ` Huacai Chen
2022-09-05  2:21             ` Youling Tang
2022-09-05  7:32               ` Huacai Chen
2022-09-05  7:45                 ` Youling Tang
2022-09-05 13:01                   ` Huacai Chen
2022-09-05 13:07                     ` Youling Tang
2022-08-29  4:37 ` [PATCH 3/3] LoongArch: Enable CONFIG_KEXEC Youling Tang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).