linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v4 1/3] kexec: Move vmcoreinfo out of the kernel's .bss section
@ 2017-04-20 11:39 Xunlei Pang
  2017-04-20 11:39 ` [PATCH v4 2/3] powerpc/fadump: Use the correct VMCOREINFO_NOTE_SIZE for phdr Xunlei Pang
                   ` (4 more replies)
  0 siblings, 5 replies; 18+ messages in thread
From: Xunlei Pang @ 2017-04-20 11:39 UTC (permalink / raw)
  To: linux-kernel, kexec
  Cc: akpm, Eric Biederman, Dave Young, Baoquan He, Petr Tesarik,
	Hari Bathini, Xunlei Pang, Michael Holzheu, Juergen Gross

As Eric said,
"what we need to do is move the variable vmcoreinfo_note out
of the kernel's .bss section.  And modify the code to regenerate
and keep this information in something like the control page.

Definitely something like this needs a page all to itself, and ideally
far away from any other kernel data structures.  I clearly was not
watching closely the data someone decided to keep this silly thing
in the kernel's .bss section."

This patch allocates extra pages for these vmcoreinfo_XXX variables,
one advantage is that it enhances some safety of vmcoreinfo, because
vmcoreinfo now is kept far away from other kernel data structures.

Suggested-by: Eric Biederman <ebiederm@xmission.com>
Cc: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Cc: Juergen Gross <jgross@suse.com>
Signed-off-by: Xunlei Pang <xlpang@redhat.com>
---
v3->v4:
-Rebased on the latest linux-next
-Handle S390 vmcoreinfo_note properly
-Handle the newly-added xen/mmu_pv.c

 arch/ia64/kernel/machine_kexec.c |  5 -----
 arch/s390/kernel/machine_kexec.c |  1 +
 arch/s390/kernel/setup.c         |  6 ------
 arch/x86/kernel/crash.c          |  2 +-
 arch/x86/xen/mmu_pv.c            |  4 ++--
 include/linux/crash_core.h       |  2 +-
 kernel/crash_core.c              | 27 +++++++++++++++++++++++----
 kernel/ksysfs.c                  |  2 +-
 8 files changed, 29 insertions(+), 20 deletions(-)

diff --git a/arch/ia64/kernel/machine_kexec.c b/arch/ia64/kernel/machine_kexec.c
index 599507b..c14815d 100644
--- a/arch/ia64/kernel/machine_kexec.c
+++ b/arch/ia64/kernel/machine_kexec.c
@@ -163,8 +163,3 @@ void arch_crash_save_vmcoreinfo(void)
 #endif
 }
 
-phys_addr_t paddr_vmcoreinfo_note(void)
-{
-	return ia64_tpa((unsigned long)(char *)&vmcoreinfo_note);
-}
-
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index 49a6bd4..3d0b14a 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -246,6 +246,7 @@ void arch_crash_save_vmcoreinfo(void)
 	VMCOREINFO_SYMBOL(lowcore_ptr);
 	VMCOREINFO_SYMBOL(high_memory);
 	VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS);
+	mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note());
 }
 
 void machine_shutdown(void)
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 3ae756c..3d1d808 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -496,11 +496,6 @@ static void __init setup_memory_end(void)
 	pr_notice("The maximum memory size is %luMB\n", memory_end >> 20);
 }
 
-static void __init setup_vmcoreinfo(void)
-{
-	mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note());
-}
-
 #ifdef CONFIG_CRASH_DUMP
 
 /*
@@ -939,7 +934,6 @@ void __init setup_arch(char **cmdline_p)
 #endif
 
 	setup_resources();
-	setup_vmcoreinfo();
 	setup_lowcore();
 	smp_fill_possible_mask();
 	cpu_detect_mhz_feature();
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 22217ec..44404e2 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -457,7 +457,7 @@ static int prepare_elf64_headers(struct crash_elf_data *ced,
 	bufp += sizeof(Elf64_Phdr);
 	phdr->p_type = PT_NOTE;
 	phdr->p_offset = phdr->p_paddr = paddr_vmcoreinfo_note();
-	phdr->p_filesz = phdr->p_memsz = sizeof(vmcoreinfo_note);
+	phdr->p_filesz = phdr->p_memsz = VMCOREINFO_NOTE_SIZE;
 	(ehdr->e_phnum)++;
 
 #ifdef CONFIG_X86_64
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 9d9ae66..35543fa 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -2723,8 +2723,8 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
 phys_addr_t paddr_vmcoreinfo_note(void)
 {
 	if (xen_pv_domain())
-		return virt_to_machine(&vmcoreinfo_note).maddr;
+		return virt_to_machine(vmcoreinfo_note).maddr;
 	else
-		return __pa_symbol(&vmcoreinfo_note);
+		return __pa(vmcoreinfo_note);
 }
 #endif /* CONFIG_KEXEC_CORE */
diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
index eb71a70..ba283a2 100644
--- a/include/linux/crash_core.h
+++ b/include/linux/crash_core.h
@@ -53,7 +53,7 @@
 #define VMCOREINFO_PHYS_BASE(value) \
 	vmcoreinfo_append_str("PHYS_BASE=%lx\n", (unsigned long)value)
 
-extern u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
+extern u32 *vmcoreinfo_note;
 extern size_t vmcoreinfo_size;
 extern size_t vmcoreinfo_max_size;
 
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index fcbd568..0321f04 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -14,10 +14,10 @@
 #include <asm/sections.h>
 
 /* vmcoreinfo stuff */
-static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
-u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
+static unsigned char *vmcoreinfo_data;
 size_t vmcoreinfo_size;
-size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
+size_t vmcoreinfo_max_size = VMCOREINFO_BYTES;
+u32 *vmcoreinfo_note;
 
 /*
  * parsing the "crashkernel" commandline
@@ -326,6 +326,9 @@ static void update_vmcoreinfo_note(void)
 
 void crash_save_vmcoreinfo(void)
 {
+	if (!vmcoreinfo_note)
+		return;
+
 	vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds());
 	update_vmcoreinfo_note();
 }
@@ -356,11 +359,27 @@ void __weak arch_crash_save_vmcoreinfo(void)
 
 phys_addr_t __weak paddr_vmcoreinfo_note(void)
 {
-	return __pa_symbol((unsigned long)(char *)&vmcoreinfo_note);
+	return __pa(vmcoreinfo_note);
 }
 
 static int __init crash_save_vmcoreinfo_init(void)
 {
+	/* One page should be enough for VMCOREINFO_BYTES under all archs */
+	vmcoreinfo_data = (unsigned char *)get_zeroed_page(GFP_KERNEL);
+	if (!vmcoreinfo_data) {
+		pr_warn("Memory allocation for vmcoreinfo_data failed\n");
+		return -ENOMEM;
+	}
+
+	vmcoreinfo_note = alloc_pages_exact(VMCOREINFO_NOTE_SIZE,
+						GFP_KERNEL | __GFP_ZERO);
+	if (!vmcoreinfo_note) {
+		free_page((unsigned long)vmcoreinfo_data);
+		vmcoreinfo_data = NULL;
+		pr_warn("Memory allocation for vmcoreinfo_note failed\n");
+		return -ENOMEM;
+	}
+
 	VMCOREINFO_OSRELEASE(init_uts_ns.name.release);
 	VMCOREINFO_PAGESIZE(PAGE_SIZE);
 
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index 23cd706..c40a4e5 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -134,7 +134,7 @@ static ssize_t vmcoreinfo_show(struct kobject *kobj,
 {
 	phys_addr_t vmcore_base = paddr_vmcoreinfo_note();
 	return sprintf(buf, "%pa %x\n", &vmcore_base,
-		       (unsigned int)sizeof(vmcoreinfo_note));
+			(unsigned int)VMCOREINFO_NOTE_SIZE);
 }
 KERNEL_ATTR_RO(vmcoreinfo);
 
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v4 2/3] powerpc/fadump: Use the correct VMCOREINFO_NOTE_SIZE for phdr
  2017-04-20 11:39 [PATCH v4 1/3] kexec: Move vmcoreinfo out of the kernel's .bss section Xunlei Pang
@ 2017-04-20 11:39 ` Xunlei Pang
  2017-04-21  0:55   ` Xunlei Pang
  2017-04-26  7:11   ` Dave Young
  2017-04-20 11:39 ` [PATCH v4 3/3] kdump: Protect vmcoreinfo data under the crash memory Xunlei Pang
                   ` (3 subsequent siblings)
  4 siblings, 2 replies; 18+ messages in thread
From: Xunlei Pang @ 2017-04-20 11:39 UTC (permalink / raw)
  To: linux-kernel, kexec
  Cc: akpm, Eric Biederman, Dave Young, Baoquan He, Petr Tesarik,
	Hari Bathini, Xunlei Pang, Mahesh Salgaonkar

vmcoreinfo_max_size stands for the vmcoreinfo_data, the
correct one we should use is vmcoreinfo_note whose total
size is VMCOREINFO_NOTE_SIZE.

Like explained in commit 77019967f06b ("kdump: fix exported
size of vmcoreinfo note"), it should not affect the actual
function, but we better fix it, also this change should be
safe and backward compatible.

After this, we can get rid of variable vmcoreinfo_max_size,
let's use the corresponding macros directly, fewer variables
means more safety for vmcoreinfo operation.

Cc: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Cc: Hari Bathini <hbathini@linux.vnet.ibm.com>
Signed-off-by: Xunlei Pang <xlpang@redhat.com>
---
v3->v4:
-Rebased on the latest linux-next

 arch/powerpc/kernel/fadump.c | 3 +--
 include/linux/crash_core.h   | 1 -
 kernel/crash_core.c          | 3 +--
 3 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 466569e..7bd6cd0 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -893,8 +893,7 @@ static int fadump_create_elfcore_headers(char *bufp)
 
 	phdr->p_paddr	= fadump_relocate(paddr_vmcoreinfo_note());
 	phdr->p_offset	= phdr->p_paddr;
-	phdr->p_memsz	= vmcoreinfo_max_size;
-	phdr->p_filesz	= vmcoreinfo_max_size;
+	phdr->p_memsz	= phdr->p_filesz = VMCOREINFO_NOTE_SIZE;
 
 	/* Increment number of program headers. */
 	(elf->e_phnum)++;
diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
index ba283a2..7d6bc7b 100644
--- a/include/linux/crash_core.h
+++ b/include/linux/crash_core.h
@@ -55,7 +55,6 @@
 
 extern u32 *vmcoreinfo_note;
 extern size_t vmcoreinfo_size;
-extern size_t vmcoreinfo_max_size;
 
 Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type,
 			  void *data, size_t data_len);
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index 0321f04..43cdb00 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -16,7 +16,6 @@
 /* vmcoreinfo stuff */
 static unsigned char *vmcoreinfo_data;
 size_t vmcoreinfo_size;
-size_t vmcoreinfo_max_size = VMCOREINFO_BYTES;
 u32 *vmcoreinfo_note;
 
 /*
@@ -343,7 +342,7 @@ void vmcoreinfo_append_str(const char *fmt, ...)
 	r = vscnprintf(buf, sizeof(buf), fmt, args);
 	va_end(args);
 
-	r = min(r, vmcoreinfo_max_size - vmcoreinfo_size);
+	r = min(r, VMCOREINFO_BYTES - vmcoreinfo_size);
 
 	memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r);
 
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v4 3/3] kdump: Protect vmcoreinfo data under the crash memory
  2017-04-20 11:39 [PATCH v4 1/3] kexec: Move vmcoreinfo out of the kernel's .bss section Xunlei Pang
  2017-04-20 11:39 ` [PATCH v4 2/3] powerpc/fadump: Use the correct VMCOREINFO_NOTE_SIZE for phdr Xunlei Pang
@ 2017-04-20 11:39 ` Xunlei Pang
  2017-04-26  7:09   ` Dave Young
  2017-04-20 14:24 ` [PATCH v4 1/3] kexec: Move vmcoreinfo out of the kernel's .bss section Juergen Gross
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 18+ messages in thread
From: Xunlei Pang @ 2017-04-20 11:39 UTC (permalink / raw)
  To: linux-kernel, kexec
  Cc: akpm, Eric Biederman, Dave Young, Baoquan He, Petr Tesarik,
	Hari Bathini, Xunlei Pang, Michael Holzheu

Currently vmcoreinfo data is updated at boot time subsys_initcall(),
it has the risk of being modified by some wrong code during system
is running.

As a result, vmcore dumped may contain the wrong vmcoreinfo. Later on,
when using "crash", "makedumpfile", etc utility to parse this vmcore,
we probably will get "Segmentation fault" or other unexpected errors.

E.g. 1) wrong code overwrites vmcoreinfo_data; 2) further crashes the
system; 3) trigger kdump, then we obviously will fail to recognize the
crash context correctly due to the corrupted vmcoreinfo.

Now except for vmcoreinfo, all the crash data is well protected(including
the cpu note which is fully updated in the crash path, thus its correctness
is guaranteed). Given that vmcoreinfo data is a large chunk prepared for
kdump, we better protect it as well.

To solve this, we relocate and copy vmcoreinfo_data to the crash memory
when kdump is loading via kexec syscalls. Because the whole crash memory
will be protected by existing arch_kexec_protect_crashkres() mechanism,
we naturally protect vmcoreinfo_data from write(even read) access under
kernel direct mapping after kdump is loaded.

Since kdump is usually loaded at the very early stage after boot, we can
trust the correctness of the vmcoreinfo data copied.

On the other hand, we still need to operate the vmcoreinfo safe copy when
crash happens to generate vmcoreinfo_note again, we rely on vmap() to map
out a new kernel virtual address and update to use this new one instead in
the following crash_save_vmcoreinfo().

BTW, we do not touch vmcoreinfo_note, because it will be fully updated
using the protected vmcoreinfo_data after crash which is surely correct
just like the cpu crash note.

Cc: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Signed-off-by: Xunlei Pang <xlpang@redhat.com>
---
v3->v4:
-Rebased on the latest linux-next
-Copy vmcoreinfo after machine_kexec_prepare()

 include/linux/crash_core.h |  2 +-
 include/linux/kexec.h      |  2 ++
 kernel/crash_core.c        | 17 ++++++++++++++++-
 kernel/kexec.c             |  8 ++++++++
 kernel/kexec_core.c        | 39 +++++++++++++++++++++++++++++++++++++++
 kernel/kexec_file.c        |  8 ++++++++
 6 files changed, 74 insertions(+), 2 deletions(-)

diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
index 7d6bc7b..5469adb 100644
--- a/include/linux/crash_core.h
+++ b/include/linux/crash_core.h
@@ -23,6 +23,7 @@
 
 typedef u32 note_buf_t[CRASH_CORE_NOTE_BYTES/4];
 
+void crash_update_vmcoreinfo_safecopy(void *ptr);
 void crash_save_vmcoreinfo(void);
 void arch_crash_save_vmcoreinfo(void);
 __printf(1, 2)
@@ -54,7 +55,6 @@
 	vmcoreinfo_append_str("PHYS_BASE=%lx\n", (unsigned long)value)
 
 extern u32 *vmcoreinfo_note;
-extern size_t vmcoreinfo_size;
 
 Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type,
 			  void *data, size_t data_len);
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index c9481eb..3ea8275 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -181,6 +181,7 @@ struct kimage {
 	unsigned long start;
 	struct page *control_code_page;
 	struct page *swap_page;
+	void *vmcoreinfo_data_copy; /* locates in the crash memory */
 
 	unsigned long nr_segments;
 	struct kexec_segment segment[KEXEC_SEGMENT_MAX];
@@ -250,6 +251,7 @@ extern void *kexec_purgatory_get_symbol_addr(struct kimage *image,
 int kexec_should_crash(struct task_struct *);
 int kexec_crash_loaded(void);
 void crash_save_cpu(struct pt_regs *regs, int cpu);
+extern int kimage_crash_copy_vmcoreinfo(struct kimage *image);
 
 extern struct kimage *kexec_image;
 extern struct kimage *kexec_crash_image;
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index 43cdb00..a29e9ad 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -15,9 +15,12 @@
 
 /* vmcoreinfo stuff */
 static unsigned char *vmcoreinfo_data;
-size_t vmcoreinfo_size;
+static size_t vmcoreinfo_size;
 u32 *vmcoreinfo_note;
 
+/* trusted vmcoreinfo, e.g. we can make a copy in the crash memory */
+static unsigned char *vmcoreinfo_data_safecopy;
+
 /*
  * parsing the "crashkernel" commandline
  *
@@ -323,11 +326,23 @@ static void update_vmcoreinfo_note(void)
 	final_note(buf);
 }
 
+void crash_update_vmcoreinfo_safecopy(void *ptr)
+{
+	if (ptr)
+		memcpy(ptr, vmcoreinfo_data, vmcoreinfo_size);
+
+	vmcoreinfo_data_safecopy = ptr;
+}
+
 void crash_save_vmcoreinfo(void)
 {
 	if (!vmcoreinfo_note)
 		return;
 
+	/* Use the safe copy to generate vmcoreinfo note if have */
+	if (vmcoreinfo_data_safecopy)
+		vmcoreinfo_data = vmcoreinfo_data_safecopy;
+
 	vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds());
 	update_vmcoreinfo_note();
 }
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 980936a..e62ec4d 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -144,6 +144,14 @@ static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
 	if (ret)
 		goto out;
 
+	/*
+	 * Some architecture(like S390) may touch the crash memory before
+	 * machine_kexec_prepare(), we must copy vmcoreinfo data after it.
+	 */
+	ret = kimage_crash_copy_vmcoreinfo(image);
+	if (ret)
+		goto out;
+
 	for (i = 0; i < nr_segments; i++) {
 		ret = kimage_load_segment(image, &image->segment[i]);
 		if (ret)
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index ae1a3ba..bac1b4f 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -481,6 +481,40 @@ struct page *kimage_alloc_control_pages(struct kimage *image,
 	return pages;
 }
 
+int kimage_crash_copy_vmcoreinfo(struct kimage *image)
+{
+	struct page *vmcoreinfo_page;
+	void *safecopy;
+
+	if (image->type != KEXEC_TYPE_CRASH)
+		return 0;
+
+	/*
+	 * For kdump, allocate one vmcoreinfo safe copy from the
+	 * crash memory. as we have arch_kexec_protect_crashkres()
+	 * after kexec syscall, we naturally protect it from write
+	 * (even read) access under kernel direct mapping. But on
+	 * the other hand, we still need to operate it when crash
+	 * happens to generate vmcoreinfo note, hereby we rely on
+	 * vmap for this purpose.
+	 */
+	vmcoreinfo_page = kimage_alloc_control_pages(image, 0);
+	if (!vmcoreinfo_page) {
+		pr_warn("Could not allocate vmcoreinfo buffer\n");
+		return -ENOMEM;
+	}
+	safecopy = vmap(&vmcoreinfo_page, 1, VM_MAP, PAGE_KERNEL);
+	if (!safecopy) {
+		pr_warn("Could not vmap vmcoreinfo buffer\n");
+		return -ENOMEM;
+	}
+
+	image->vmcoreinfo_data_copy = safecopy;
+	crash_update_vmcoreinfo_safecopy(safecopy);
+
+	return 0;
+}
+
 static int kimage_add_entry(struct kimage *image, kimage_entry_t entry)
 {
 	if (*image->entry != 0)
@@ -598,6 +632,11 @@ void kimage_free(struct kimage *image)
 	if (image->file_mode)
 		kimage_file_post_load_cleanup(image);
 
+	if (image->vmcoreinfo_data_copy) {
+		crash_update_vmcoreinfo_safecopy(NULL);
+		vunmap(image->vmcoreinfo_data_copy);
+	}
+
 	kfree(image);
 }
 
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index b118735..f78f719 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -304,6 +304,14 @@ void kimage_file_post_load_cleanup(struct kimage *image)
 	if (ret)
 		goto out;
 
+	/*
+	 * Some architecture(like S390) may touch the crash memory before
+	 * machine_kexec_prepare(), we must copy vmcoreinfo data after it.
+	 */
+	ret = kimage_crash_copy_vmcoreinfo(image);
+	if (ret)
+		goto out;
+
 	ret = kexec_calculate_store_digests(image);
 	if (ret)
 		goto out;
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH v4 1/3] kexec: Move vmcoreinfo out of the kernel's .bss section
  2017-04-20 11:39 [PATCH v4 1/3] kexec: Move vmcoreinfo out of the kernel's .bss section Xunlei Pang
  2017-04-20 11:39 ` [PATCH v4 2/3] powerpc/fadump: Use the correct VMCOREINFO_NOTE_SIZE for phdr Xunlei Pang
  2017-04-20 11:39 ` [PATCH v4 3/3] kdump: Protect vmcoreinfo data under the crash memory Xunlei Pang
@ 2017-04-20 14:24 ` Juergen Gross
  2017-04-24 15:18 ` Michael Holzheu
  2017-04-26  7:19 ` Dave Young
  4 siblings, 0 replies; 18+ messages in thread
From: Juergen Gross @ 2017-04-20 14:24 UTC (permalink / raw)
  To: Xunlei Pang, linux-kernel, kexec
  Cc: akpm, Eric Biederman, Dave Young, Baoquan He, Petr Tesarik,
	Hari Bathini, Michael Holzheu

On 20/04/17 13:39, Xunlei Pang wrote:
> As Eric said,
> "what we need to do is move the variable vmcoreinfo_note out
> of the kernel's .bss section.  And modify the code to regenerate
> and keep this information in something like the control page.
> 
> Definitely something like this needs a page all to itself, and ideally
> far away from any other kernel data structures.  I clearly was not
> watching closely the data someone decided to keep this silly thing
> in the kernel's .bss section."
> 
> This patch allocates extra pages for these vmcoreinfo_XXX variables,
> one advantage is that it enhances some safety of vmcoreinfo, because
> vmcoreinfo now is kept far away from other kernel data structures.
> 
> Suggested-by: Eric Biederman <ebiederm@xmission.com>
> Cc: Michael Holzheu <holzheu@linux.vnet.ibm.com>
> Cc: Juergen Gross <jgross@suse.com>
> Signed-off-by: Xunlei Pang <xlpang@redhat.com>

Xen parts: Reviewed-by: Juergen Gross <jgross@suse.com>


Juergen

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v4 2/3] powerpc/fadump: Use the correct VMCOREINFO_NOTE_SIZE for phdr
  2017-04-20 11:39 ` [PATCH v4 2/3] powerpc/fadump: Use the correct VMCOREINFO_NOTE_SIZE for phdr Xunlei Pang
@ 2017-04-21  0:55   ` Xunlei Pang
  2017-04-26  7:11   ` Dave Young
  1 sibling, 0 replies; 18+ messages in thread
From: Xunlei Pang @ 2017-04-21  0:55 UTC (permalink / raw)
  To: Xunlei Pang, linux-kernel, kexec
  Cc: Baoquan He, Petr Tesarik, Eric Biederman, Hari Bathini, akpm,
	Dave Young, Michael Holzheu

Cc "Michael Holzheu"

On 04/20/2017 at 07:39 PM, Xunlei Pang wrote:
> vmcoreinfo_max_size stands for the vmcoreinfo_data, the
> correct one we should use is vmcoreinfo_note whose total
> size is VMCOREINFO_NOTE_SIZE.
>
> Like explained in commit 77019967f06b ("kdump: fix exported
> size of vmcoreinfo note"), it should not affect the actual
> function, but we better fix it, also this change should be
> safe and backward compatible.
>
> After this, we can get rid of variable vmcoreinfo_max_size,
> let's use the corresponding macros directly, fewer variables
> means more safety for vmcoreinfo operation.
>
> Cc: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
> Cc: Hari Bathini <hbathini@linux.vnet.ibm.com>
> Signed-off-by: Xunlei Pang <xlpang@redhat.com>
> ---
> v3->v4:
> -Rebased on the latest linux-next
>
>  arch/powerpc/kernel/fadump.c | 3 +--
>  include/linux/crash_core.h   | 1 -
>  kernel/crash_core.c          | 3 +--
>  3 files changed, 2 insertions(+), 5 deletions(-)
>
> diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
> index 466569e..7bd6cd0 100644
> --- a/arch/powerpc/kernel/fadump.c
> +++ b/arch/powerpc/kernel/fadump.c
> @@ -893,8 +893,7 @@ static int fadump_create_elfcore_headers(char *bufp)
>  
>  	phdr->p_paddr	= fadump_relocate(paddr_vmcoreinfo_note());
>  	phdr->p_offset	= phdr->p_paddr;
> -	phdr->p_memsz	= vmcoreinfo_max_size;
> -	phdr->p_filesz	= vmcoreinfo_max_size;
> +	phdr->p_memsz	= phdr->p_filesz = VMCOREINFO_NOTE_SIZE;
>  
>  	/* Increment number of program headers. */
>  	(elf->e_phnum)++;
> diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
> index ba283a2..7d6bc7b 100644
> --- a/include/linux/crash_core.h
> +++ b/include/linux/crash_core.h
> @@ -55,7 +55,6 @@
>  
>  extern u32 *vmcoreinfo_note;
>  extern size_t vmcoreinfo_size;
> -extern size_t vmcoreinfo_max_size;
>  
>  Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type,
>  			  void *data, size_t data_len);
> diff --git a/kernel/crash_core.c b/kernel/crash_core.c
> index 0321f04..43cdb00 100644
> --- a/kernel/crash_core.c
> +++ b/kernel/crash_core.c
> @@ -16,7 +16,6 @@
>  /* vmcoreinfo stuff */
>  static unsigned char *vmcoreinfo_data;
>  size_t vmcoreinfo_size;
> -size_t vmcoreinfo_max_size = VMCOREINFO_BYTES;
>  u32 *vmcoreinfo_note;
>  
>  /*
> @@ -343,7 +342,7 @@ void vmcoreinfo_append_str(const char *fmt, ...)
>  	r = vscnprintf(buf, sizeof(buf), fmt, args);
>  	va_end(args);
>  
> -	r = min(r, vmcoreinfo_max_size - vmcoreinfo_size);
> +	r = min(r, VMCOREINFO_BYTES - vmcoreinfo_size);
>  
>  	memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r);
>  

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v4 1/3] kexec: Move vmcoreinfo out of the kernel's .bss section
  2017-04-20 11:39 [PATCH v4 1/3] kexec: Move vmcoreinfo out of the kernel's .bss section Xunlei Pang
                   ` (2 preceding siblings ...)
  2017-04-20 14:24 ` [PATCH v4 1/3] kexec: Move vmcoreinfo out of the kernel's .bss section Juergen Gross
@ 2017-04-24 15:18 ` Michael Holzheu
  2017-04-26  7:19 ` Dave Young
  4 siblings, 0 replies; 18+ messages in thread
From: Michael Holzheu @ 2017-04-24 15:18 UTC (permalink / raw)
  To: Xunlei Pang
  Cc: linux-kernel, kexec, akpm, Eric Biederman, Dave Young,
	Baoquan He, Petr Tesarik, Hari Bathini, Juergen Gross

Am Thu, 20 Apr 2017 19:39:32 +0800
schrieb Xunlei Pang <xlpang@redhat.com>:

> As Eric said,
> "what we need to do is move the variable vmcoreinfo_note out
> of the kernel's .bss section.  And modify the code to regenerate
> and keep this information in something like the control page.
> 
> Definitely something like this needs a page all to itself, and ideally
> far away from any other kernel data structures.  I clearly was not
> watching closely the data someone decided to keep this silly thing
> in the kernel's .bss section."
> 
> This patch allocates extra pages for these vmcoreinfo_XXX variables,
> one advantage is that it enhances some safety of vmcoreinfo, because
> vmcoreinfo now is kept far away from other kernel data structures.
> 
> Suggested-by: Eric Biederman <ebiederm@xmission.com>
> Cc: Michael Holzheu <holzheu@linux.vnet.ibm.com>
> Cc: Juergen Gross <jgross@suse.com>
> Signed-off-by: Xunlei Pang <xlpang@redhat.com>

Now s390 seems to work with the patches. I tested kdump and
zfcpdump.

Tested-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>


> ---
> v3->v4:
> -Rebased on the latest linux-next
> -Handle S390 vmcoreinfo_note properly
> -Handle the newly-added xen/mmu_pv.c
> 
>  arch/ia64/kernel/machine_kexec.c |  5 -----
>  arch/s390/kernel/machine_kexec.c |  1 +
>  arch/s390/kernel/setup.c         |  6 ------
>  arch/x86/kernel/crash.c          |  2 +-
>  arch/x86/xen/mmu_pv.c            |  4 ++--
>  include/linux/crash_core.h       |  2 +-
>  kernel/crash_core.c              | 27 +++++++++++++++++++++++----
>  kernel/ksysfs.c                  |  2 +-
>  8 files changed, 29 insertions(+), 20 deletions(-)
> 
> diff --git a/arch/ia64/kernel/machine_kexec.c b/arch/ia64/kernel/machine_kexec.c
> index 599507b..c14815d 100644
> --- a/arch/ia64/kernel/machine_kexec.c
> +++ b/arch/ia64/kernel/machine_kexec.c
> @@ -163,8 +163,3 @@ void arch_crash_save_vmcoreinfo(void)
>  #endif
>  }
> 
> -phys_addr_t paddr_vmcoreinfo_note(void)
> -{
> -	return ia64_tpa((unsigned long)(char *)&vmcoreinfo_note);
> -}
> -
> diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
> index 49a6bd4..3d0b14a 100644
> --- a/arch/s390/kernel/machine_kexec.c
> +++ b/arch/s390/kernel/machine_kexec.c
> @@ -246,6 +246,7 @@ void arch_crash_save_vmcoreinfo(void)
>  	VMCOREINFO_SYMBOL(lowcore_ptr);
>  	VMCOREINFO_SYMBOL(high_memory);
>  	VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS);
> +	mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note());
>  }
> 
>  void machine_shutdown(void)
> diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
> index 3ae756c..3d1d808 100644
> --- a/arch/s390/kernel/setup.c
> +++ b/arch/s390/kernel/setup.c
> @@ -496,11 +496,6 @@ static void __init setup_memory_end(void)
>  	pr_notice("The maximum memory size is %luMB\n", memory_end >> 20);
>  }
> 
> -static void __init setup_vmcoreinfo(void)
> -{
> -	mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note());
> -}
> -
>  #ifdef CONFIG_CRASH_DUMP
> 
>  /*
> @@ -939,7 +934,6 @@ void __init setup_arch(char **cmdline_p)
>  #endif
> 
>  	setup_resources();
> -	setup_vmcoreinfo();
>  	setup_lowcore();
>  	smp_fill_possible_mask();
>  	cpu_detect_mhz_feature();
> diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
> index 22217ec..44404e2 100644
> --- a/arch/x86/kernel/crash.c
> +++ b/arch/x86/kernel/crash.c
> @@ -457,7 +457,7 @@ static int prepare_elf64_headers(struct crash_elf_data *ced,
>  	bufp += sizeof(Elf64_Phdr);
>  	phdr->p_type = PT_NOTE;
>  	phdr->p_offset = phdr->p_paddr = paddr_vmcoreinfo_note();
> -	phdr->p_filesz = phdr->p_memsz = sizeof(vmcoreinfo_note);
> +	phdr->p_filesz = phdr->p_memsz = VMCOREINFO_NOTE_SIZE;
>  	(ehdr->e_phnum)++;
> 
>  #ifdef CONFIG_X86_64
> diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
> index 9d9ae66..35543fa 100644
> --- a/arch/x86/xen/mmu_pv.c
> +++ b/arch/x86/xen/mmu_pv.c
> @@ -2723,8 +2723,8 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
>  phys_addr_t paddr_vmcoreinfo_note(void)
>  {
>  	if (xen_pv_domain())
> -		return virt_to_machine(&vmcoreinfo_note).maddr;
> +		return virt_to_machine(vmcoreinfo_note).maddr;
>  	else
> -		return __pa_symbol(&vmcoreinfo_note);
> +		return __pa(vmcoreinfo_note);
>  }
>  #endif /* CONFIG_KEXEC_CORE */
> diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
> index eb71a70..ba283a2 100644
> --- a/include/linux/crash_core.h
> +++ b/include/linux/crash_core.h
> @@ -53,7 +53,7 @@
>  #define VMCOREINFO_PHYS_BASE(value) \
>  	vmcoreinfo_append_str("PHYS_BASE=%lx\n", (unsigned long)value)
> 
> -extern u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
> +extern u32 *vmcoreinfo_note;
>  extern size_t vmcoreinfo_size;
>  extern size_t vmcoreinfo_max_size;
> 
> diff --git a/kernel/crash_core.c b/kernel/crash_core.c
> index fcbd568..0321f04 100644
> --- a/kernel/crash_core.c
> +++ b/kernel/crash_core.c
> @@ -14,10 +14,10 @@
>  #include <asm/sections.h>
> 
>  /* vmcoreinfo stuff */
> -static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
> -u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
> +static unsigned char *vmcoreinfo_data;
>  size_t vmcoreinfo_size;
> -size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
> +size_t vmcoreinfo_max_size = VMCOREINFO_BYTES;
> +u32 *vmcoreinfo_note;
> 
>  /*
>   * parsing the "crashkernel" commandline
> @@ -326,6 +326,9 @@ static void update_vmcoreinfo_note(void)
> 
>  void crash_save_vmcoreinfo(void)
>  {
> +	if (!vmcoreinfo_note)
> +		return;
> +
>  	vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds());
>  	update_vmcoreinfo_note();
>  }
> @@ -356,11 +359,27 @@ void __weak arch_crash_save_vmcoreinfo(void)
> 
>  phys_addr_t __weak paddr_vmcoreinfo_note(void)
>  {
> -	return __pa_symbol((unsigned long)(char *)&vmcoreinfo_note);
> +	return __pa(vmcoreinfo_note);
>  }
> 
>  static int __init crash_save_vmcoreinfo_init(void)
>  {
> +	/* One page should be enough for VMCOREINFO_BYTES under all archs */
> +	vmcoreinfo_data = (unsigned char *)get_zeroed_page(GFP_KERNEL);
> +	if (!vmcoreinfo_data) {
> +		pr_warn("Memory allocation for vmcoreinfo_data failed\n");
> +		return -ENOMEM;
> +	}
> +
> +	vmcoreinfo_note = alloc_pages_exact(VMCOREINFO_NOTE_SIZE,
> +						GFP_KERNEL | __GFP_ZERO);
> +	if (!vmcoreinfo_note) {
> +		free_page((unsigned long)vmcoreinfo_data);
> +		vmcoreinfo_data = NULL;
> +		pr_warn("Memory allocation for vmcoreinfo_note failed\n");
> +		return -ENOMEM;
> +	}
> +
>  	VMCOREINFO_OSRELEASE(init_uts_ns.name.release);
>  	VMCOREINFO_PAGESIZE(PAGE_SIZE);
> 
> diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
> index 23cd706..c40a4e5 100644
> --- a/kernel/ksysfs.c
> +++ b/kernel/ksysfs.c
> @@ -134,7 +134,7 @@ static ssize_t vmcoreinfo_show(struct kobject *kobj,
>  {
>  	phys_addr_t vmcore_base = paddr_vmcoreinfo_note();
>  	return sprintf(buf, "%pa %x\n", &vmcore_base,
> -		       (unsigned int)sizeof(vmcoreinfo_note));
> +			(unsigned int)VMCOREINFO_NOTE_SIZE);
>  }
>  KERNEL_ATTR_RO(vmcoreinfo);
> 

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v4 3/3] kdump: Protect vmcoreinfo data under the crash memory
  2017-04-20 11:39 ` [PATCH v4 3/3] kdump: Protect vmcoreinfo data under the crash memory Xunlei Pang
@ 2017-04-26  7:09   ` Dave Young
  2017-04-26  9:21     ` Xunlei Pang
  0 siblings, 1 reply; 18+ messages in thread
From: Dave Young @ 2017-04-26  7:09 UTC (permalink / raw)
  To: Xunlei Pang
  Cc: linux-kernel, kexec, Baoquan He, Petr Tesarik, Eric Biederman,
	Hari Bathini, akpm, Michael Holzheu

On 04/20/17 at 07:39pm, Xunlei Pang wrote:
> Currently vmcoreinfo data is updated at boot time subsys_initcall(),
> it has the risk of being modified by some wrong code during system
> is running.
> 
> As a result, vmcore dumped may contain the wrong vmcoreinfo. Later on,
> when using "crash", "makedumpfile", etc utility to parse this vmcore,
> we probably will get "Segmentation fault" or other unexpected errors.
> 
> E.g. 1) wrong code overwrites vmcoreinfo_data; 2) further crashes the
> system; 3) trigger kdump, then we obviously will fail to recognize the
> crash context correctly due to the corrupted vmcoreinfo.
> 
> Now except for vmcoreinfo, all the crash data is well protected(including
> the cpu note which is fully updated in the crash path, thus its correctness
> is guaranteed). Given that vmcoreinfo data is a large chunk prepared for
> kdump, we better protect it as well.
> 
> To solve this, we relocate and copy vmcoreinfo_data to the crash memory
> when kdump is loading via kexec syscalls. Because the whole crash memory
> will be protected by existing arch_kexec_protect_crashkres() mechanism,
> we naturally protect vmcoreinfo_data from write(even read) access under
> kernel direct mapping after kdump is loaded.
> 
> Since kdump is usually loaded at the very early stage after boot, we can
> trust the correctness of the vmcoreinfo data copied.
> 
> On the other hand, we still need to operate the vmcoreinfo safe copy when
> crash happens to generate vmcoreinfo_note again, we rely on vmap() to map
> out a new kernel virtual address and update to use this new one instead in
> the following crash_save_vmcoreinfo().
> 
> BTW, we do not touch vmcoreinfo_note, because it will be fully updated
> using the protected vmcoreinfo_data after crash which is surely correct
> just like the cpu crash note.
> 
> Cc: Michael Holzheu <holzheu@linux.vnet.ibm.com>
> Signed-off-by: Xunlei Pang <xlpang@redhat.com>
> ---
> v3->v4:
> -Rebased on the latest linux-next
> -Copy vmcoreinfo after machine_kexec_prepare()
> 
>  include/linux/crash_core.h |  2 +-
>  include/linux/kexec.h      |  2 ++
>  kernel/crash_core.c        | 17 ++++++++++++++++-
>  kernel/kexec.c             |  8 ++++++++
>  kernel/kexec_core.c        | 39 +++++++++++++++++++++++++++++++++++++++
>  kernel/kexec_file.c        |  8 ++++++++
>  6 files changed, 74 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
> index 7d6bc7b..5469adb 100644
> --- a/include/linux/crash_core.h
> +++ b/include/linux/crash_core.h
> @@ -23,6 +23,7 @@
>  
>  typedef u32 note_buf_t[CRASH_CORE_NOTE_BYTES/4];
>  
> +void crash_update_vmcoreinfo_safecopy(void *ptr);
>  void crash_save_vmcoreinfo(void);
>  void arch_crash_save_vmcoreinfo(void);
>  __printf(1, 2)
> @@ -54,7 +55,6 @@
>  	vmcoreinfo_append_str("PHYS_BASE=%lx\n", (unsigned long)value)
>  
>  extern u32 *vmcoreinfo_note;
> -extern size_t vmcoreinfo_size;
>  
>  Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type,
>  			  void *data, size_t data_len);
> diff --git a/include/linux/kexec.h b/include/linux/kexec.h
> index c9481eb..3ea8275 100644
> --- a/include/linux/kexec.h
> +++ b/include/linux/kexec.h
> @@ -181,6 +181,7 @@ struct kimage {
>  	unsigned long start;
>  	struct page *control_code_page;
>  	struct page *swap_page;
> +	void *vmcoreinfo_data_copy; /* locates in the crash memory */
>  
>  	unsigned long nr_segments;
>  	struct kexec_segment segment[KEXEC_SEGMENT_MAX];
> @@ -250,6 +251,7 @@ extern void *kexec_purgatory_get_symbol_addr(struct kimage *image,
>  int kexec_should_crash(struct task_struct *);
>  int kexec_crash_loaded(void);
>  void crash_save_cpu(struct pt_regs *regs, int cpu);
> +extern int kimage_crash_copy_vmcoreinfo(struct kimage *image);
>  
>  extern struct kimage *kexec_image;
>  extern struct kimage *kexec_crash_image;
> diff --git a/kernel/crash_core.c b/kernel/crash_core.c
> index 43cdb00..a29e9ad 100644
> --- a/kernel/crash_core.c
> +++ b/kernel/crash_core.c
> @@ -15,9 +15,12 @@
>  
>  /* vmcoreinfo stuff */
>  static unsigned char *vmcoreinfo_data;
> -size_t vmcoreinfo_size;
> +static size_t vmcoreinfo_size;
>  u32 *vmcoreinfo_note;
>  
> +/* trusted vmcoreinfo, e.g. we can make a copy in the crash memory */

May make it clearer like:
/* Trusted vmcoreinfo copy in the kdump reserved memory */

> +static unsigned char *vmcoreinfo_data_safecopy;
> +
>  /*
>   * parsing the "crashkernel" commandline
>   *
> @@ -323,11 +326,23 @@ static void update_vmcoreinfo_note(void)
>  	final_note(buf);
>  }
>  
> +void crash_update_vmcoreinfo_safecopy(void *ptr)
> +{
> +	if (ptr)
> +		memcpy(ptr, vmcoreinfo_data, vmcoreinfo_size);
> +
> +	vmcoreinfo_data_safecopy = ptr;
> +}
> +
>  void crash_save_vmcoreinfo(void)
>  {
>  	if (!vmcoreinfo_note)
>  		return;
>  
> +	/* Use the safe copy to generate vmcoreinfo note if have */
> +	if (vmcoreinfo_data_safecopy)
> +		vmcoreinfo_data = vmcoreinfo_data_safecopy;
> +
>  	vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds());
>  	update_vmcoreinfo_note();
>  }
> diff --git a/kernel/kexec.c b/kernel/kexec.c
> index 980936a..e62ec4d 100644
> --- a/kernel/kexec.c
> +++ b/kernel/kexec.c
> @@ -144,6 +144,14 @@ static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
>  	if (ret)
>  		goto out;
>  
> +	/*
> +	 * Some architecture(like S390) may touch the crash memory before
> +	 * machine_kexec_prepare(), we must copy vmcoreinfo data after it.
> +	 */
> +	ret = kimage_crash_copy_vmcoreinfo(image);
> +	if (ret)
> +		goto out;
> +
>  	for (i = 0; i < nr_segments; i++) {
>  		ret = kimage_load_segment(image, &image->segment[i]);
>  		if (ret)
> diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
> index ae1a3ba..bac1b4f 100644
> --- a/kernel/kexec_core.c
> +++ b/kernel/kexec_core.c
> @@ -481,6 +481,40 @@ struct page *kimage_alloc_control_pages(struct kimage *image,
>  	return pages;
>  }
>  
> +int kimage_crash_copy_vmcoreinfo(struct kimage *image)
> +{
> +	struct page *vmcoreinfo_page;
> +	void *safecopy;
> +
> +	if (image->type != KEXEC_TYPE_CRASH)
> +		return 0;
> +
> +	/*
> +	 * For kdump, allocate one vmcoreinfo safe copy from the
> +	 * crash memory. as we have arch_kexec_protect_crashkres()
> +	 * after kexec syscall, we naturally protect it from write
> +	 * (even read) access under kernel direct mapping. But on
> +	 * the other hand, we still need to operate it when crash
> +	 * happens to generate vmcoreinfo note, hereby we rely on
> +	 * vmap for this purpose.
> +	 */
> +	vmcoreinfo_page = kimage_alloc_control_pages(image, 0);
> +	if (!vmcoreinfo_page) {
> +		pr_warn("Could not allocate vmcoreinfo buffer\n");
> +		return -ENOMEM;
> +	}
> +	safecopy = vmap(&vmcoreinfo_page, 1, VM_MAP, PAGE_KERNEL);
> +	if (!safecopy) {
> +		pr_warn("Could not vmap vmcoreinfo buffer\n");
> +		return -ENOMEM;
> +	}
> +
> +	image->vmcoreinfo_data_copy = safecopy;
> +	crash_update_vmcoreinfo_safecopy(safecopy);
> +
> +	return 0;
> +}
> +
>  static int kimage_add_entry(struct kimage *image, kimage_entry_t entry)
>  {
>  	if (*image->entry != 0)
> @@ -598,6 +632,11 @@ void kimage_free(struct kimage *image)
>  	if (image->file_mode)
>  		kimage_file_post_load_cleanup(image);
>  
> +	if (image->vmcoreinfo_data_copy) {
> +		crash_update_vmcoreinfo_safecopy(NULL);
> +		vunmap(image->vmcoreinfo_data_copy);
> +	}
> +

Should move above chunk before the freeing of the actual page?

>  	kfree(image);
>  }
>  
> diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
> index b118735..f78f719 100644
> --- a/kernel/kexec_file.c
> +++ b/kernel/kexec_file.c
> @@ -304,6 +304,14 @@ void kimage_file_post_load_cleanup(struct kimage *image)
>  	if (ret)
>  		goto out;
>  
> +	/*
> +	 * Some architecture(like S390) may touch the crash memory before
> +	 * machine_kexec_prepare(), we must copy vmcoreinfo data after it.
> +	 */
> +	ret = kimage_crash_copy_vmcoreinfo(image);
> +	if (ret)
> +		goto out;
> +
>  	ret = kexec_calculate_store_digests(image);
>  	if (ret)
>  		goto out;
> -- 
> 1.8.3.1
> 
> 
> _______________________________________________
> kexec mailing list
> kexec@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/kexec

Thanks
Dave

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v4 2/3] powerpc/fadump: Use the correct VMCOREINFO_NOTE_SIZE for phdr
  2017-04-20 11:39 ` [PATCH v4 2/3] powerpc/fadump: Use the correct VMCOREINFO_NOTE_SIZE for phdr Xunlei Pang
  2017-04-21  0:55   ` Xunlei Pang
@ 2017-04-26  7:11   ` Dave Young
  2017-04-27  7:38     ` Mahesh Jagannath Salgaonkar
  1 sibling, 1 reply; 18+ messages in thread
From: Dave Young @ 2017-04-26  7:11 UTC (permalink / raw)
  To: Xunlei Pang
  Cc: linux-kernel, kexec, Baoquan He, Petr Tesarik, Eric Biederman,
	Hari Bathini, akpm, linuxppc-dev

Ccing ppc list
On 04/20/17 at 07:39pm, Xunlei Pang wrote:
> vmcoreinfo_max_size stands for the vmcoreinfo_data, the
> correct one we should use is vmcoreinfo_note whose total
> size is VMCOREINFO_NOTE_SIZE.
> 
> Like explained in commit 77019967f06b ("kdump: fix exported
> size of vmcoreinfo note"), it should not affect the actual
> function, but we better fix it, also this change should be
> safe and backward compatible.
> 
> After this, we can get rid of variable vmcoreinfo_max_size,
> let's use the corresponding macros directly, fewer variables
> means more safety for vmcoreinfo operation.
> 
> Cc: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
> Cc: Hari Bathini <hbathini@linux.vnet.ibm.com>
> Signed-off-by: Xunlei Pang <xlpang@redhat.com>
> ---
> v3->v4:
> -Rebased on the latest linux-next
> 
>  arch/powerpc/kernel/fadump.c | 3 +--
>  include/linux/crash_core.h   | 1 -
>  kernel/crash_core.c          | 3 +--
>  3 files changed, 2 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
> index 466569e..7bd6cd0 100644
> --- a/arch/powerpc/kernel/fadump.c
> +++ b/arch/powerpc/kernel/fadump.c
> @@ -893,8 +893,7 @@ static int fadump_create_elfcore_headers(char *bufp)
>  
>  	phdr->p_paddr	= fadump_relocate(paddr_vmcoreinfo_note());
>  	phdr->p_offset	= phdr->p_paddr;
> -	phdr->p_memsz	= vmcoreinfo_max_size;
> -	phdr->p_filesz	= vmcoreinfo_max_size;
> +	phdr->p_memsz	= phdr->p_filesz = VMCOREINFO_NOTE_SIZE;
>  
>  	/* Increment number of program headers. */
>  	(elf->e_phnum)++;
> diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
> index ba283a2..7d6bc7b 100644
> --- a/include/linux/crash_core.h
> +++ b/include/linux/crash_core.h
> @@ -55,7 +55,6 @@
>  
>  extern u32 *vmcoreinfo_note;
>  extern size_t vmcoreinfo_size;
> -extern size_t vmcoreinfo_max_size;
>  
>  Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type,
>  			  void *data, size_t data_len);
> diff --git a/kernel/crash_core.c b/kernel/crash_core.c
> index 0321f04..43cdb00 100644
> --- a/kernel/crash_core.c
> +++ b/kernel/crash_core.c
> @@ -16,7 +16,6 @@
>  /* vmcoreinfo stuff */
>  static unsigned char *vmcoreinfo_data;
>  size_t vmcoreinfo_size;
> -size_t vmcoreinfo_max_size = VMCOREINFO_BYTES;
>  u32 *vmcoreinfo_note;
>  
>  /*
> @@ -343,7 +342,7 @@ void vmcoreinfo_append_str(const char *fmt, ...)
>  	r = vscnprintf(buf, sizeof(buf), fmt, args);
>  	va_end(args);
>  
> -	r = min(r, vmcoreinfo_max_size - vmcoreinfo_size);
> +	r = min(r, VMCOREINFO_BYTES - vmcoreinfo_size);
>  
>  	memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r);
>  
> -- 
> 1.8.3.1
> 
> 
> _______________________________________________
> kexec mailing list
> kexec@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/kexec

Reviewed-by: Dave Young <dyoung@redhat.com>

Thanks
Dave

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v4 1/3] kexec: Move vmcoreinfo out of the kernel's .bss section
  2017-04-20 11:39 [PATCH v4 1/3] kexec: Move vmcoreinfo out of the kernel's .bss section Xunlei Pang
                   ` (3 preceding siblings ...)
  2017-04-24 15:18 ` Michael Holzheu
@ 2017-04-26  7:19 ` Dave Young
  2017-04-26  9:51   ` Xunlei Pang
  4 siblings, 1 reply; 18+ messages in thread
From: Dave Young @ 2017-04-26  7:19 UTC (permalink / raw)
  To: Xunlei Pang
  Cc: linux-kernel, kexec, Juergen Gross, Baoquan He, Petr Tesarik,
	Eric Biederman, Hari Bathini, akpm, Michael Holzheu, linux-ia64,
	linux-s390

Add ia64i list,  and s390 list although Michael has tested it

On 04/20/17 at 07:39pm, Xunlei Pang wrote:
> As Eric said,
> "what we need to do is move the variable vmcoreinfo_note out
> of the kernel's .bss section.  And modify the code to regenerate
> and keep this information in something like the control page.
> 
> Definitely something like this needs a page all to itself, and ideally
> far away from any other kernel data structures.  I clearly was not
> watching closely the data someone decided to keep this silly thing
> in the kernel's .bss section."
> 
> This patch allocates extra pages for these vmcoreinfo_XXX variables,
> one advantage is that it enhances some safety of vmcoreinfo, because
> vmcoreinfo now is kept far away from other kernel data structures.
> 
> Suggested-by: Eric Biederman <ebiederm@xmission.com>
> Cc: Michael Holzheu <holzheu@linux.vnet.ibm.com>
> Cc: Juergen Gross <jgross@suse.com>
> Signed-off-by: Xunlei Pang <xlpang@redhat.com>
> ---
> v3->v4:
> -Rebased on the latest linux-next
> -Handle S390 vmcoreinfo_note properly
> -Handle the newly-added xen/mmu_pv.c
> 
>  arch/ia64/kernel/machine_kexec.c |  5 -----
>  arch/s390/kernel/machine_kexec.c |  1 +
>  arch/s390/kernel/setup.c         |  6 ------
>  arch/x86/kernel/crash.c          |  2 +-
>  arch/x86/xen/mmu_pv.c            |  4 ++--
>  include/linux/crash_core.h       |  2 +-
>  kernel/crash_core.c              | 27 +++++++++++++++++++++++----
>  kernel/ksysfs.c                  |  2 +-
>  8 files changed, 29 insertions(+), 20 deletions(-)
> 
> diff --git a/arch/ia64/kernel/machine_kexec.c b/arch/ia64/kernel/machine_kexec.c
> index 599507b..c14815d 100644
> --- a/arch/ia64/kernel/machine_kexec.c
> +++ b/arch/ia64/kernel/machine_kexec.c
> @@ -163,8 +163,3 @@ void arch_crash_save_vmcoreinfo(void)
>  #endif
>  }
>  
> -phys_addr_t paddr_vmcoreinfo_note(void)
> -{
> -	return ia64_tpa((unsigned long)(char *)&vmcoreinfo_note);
> -}
> -
> diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
> index 49a6bd4..3d0b14a 100644
> --- a/arch/s390/kernel/machine_kexec.c
> +++ b/arch/s390/kernel/machine_kexec.c
> @@ -246,6 +246,7 @@ void arch_crash_save_vmcoreinfo(void)
>  	VMCOREINFO_SYMBOL(lowcore_ptr);
>  	VMCOREINFO_SYMBOL(high_memory);
>  	VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS);
> +	mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note());
>  }
>  
>  void machine_shutdown(void)
> diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
> index 3ae756c..3d1d808 100644
> --- a/arch/s390/kernel/setup.c
> +++ b/arch/s390/kernel/setup.c
> @@ -496,11 +496,6 @@ static void __init setup_memory_end(void)
>  	pr_notice("The maximum memory size is %luMB\n", memory_end >> 20);
>  }
>  
> -static void __init setup_vmcoreinfo(void)
> -{
> -	mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note());
> -}
> -
>  #ifdef CONFIG_CRASH_DUMP
>  
>  /*
> @@ -939,7 +934,6 @@ void __init setup_arch(char **cmdline_p)
>  #endif
>  
>  	setup_resources();
> -	setup_vmcoreinfo();
>  	setup_lowcore();
>  	smp_fill_possible_mask();
>  	cpu_detect_mhz_feature();
> diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
> index 22217ec..44404e2 100644
> --- a/arch/x86/kernel/crash.c
> +++ b/arch/x86/kernel/crash.c
> @@ -457,7 +457,7 @@ static int prepare_elf64_headers(struct crash_elf_data *ced,
>  	bufp += sizeof(Elf64_Phdr);
>  	phdr->p_type = PT_NOTE;
>  	phdr->p_offset = phdr->p_paddr = paddr_vmcoreinfo_note();
> -	phdr->p_filesz = phdr->p_memsz = sizeof(vmcoreinfo_note);
> +	phdr->p_filesz = phdr->p_memsz = VMCOREINFO_NOTE_SIZE;
>  	(ehdr->e_phnum)++;
>  
>  #ifdef CONFIG_X86_64
> diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
> index 9d9ae66..35543fa 100644
> --- a/arch/x86/xen/mmu_pv.c
> +++ b/arch/x86/xen/mmu_pv.c
> @@ -2723,8 +2723,8 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
>  phys_addr_t paddr_vmcoreinfo_note(void)
>  {
>  	if (xen_pv_domain())
> -		return virt_to_machine(&vmcoreinfo_note).maddr;
> +		return virt_to_machine(vmcoreinfo_note).maddr;
>  	else
> -		return __pa_symbol(&vmcoreinfo_note);
> +		return __pa(vmcoreinfo_note);
>  }
>  #endif /* CONFIG_KEXEC_CORE */
> diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
> index eb71a70..ba283a2 100644
> --- a/include/linux/crash_core.h
> +++ b/include/linux/crash_core.h
> @@ -53,7 +53,7 @@
>  #define VMCOREINFO_PHYS_BASE(value) \
>  	vmcoreinfo_append_str("PHYS_BASE=%lx\n", (unsigned long)value)
>  
> -extern u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
> +extern u32 *vmcoreinfo_note;
>  extern size_t vmcoreinfo_size;
>  extern size_t vmcoreinfo_max_size;
>  
> diff --git a/kernel/crash_core.c b/kernel/crash_core.c
> index fcbd568..0321f04 100644
> --- a/kernel/crash_core.c
> +++ b/kernel/crash_core.c
> @@ -14,10 +14,10 @@
>  #include <asm/sections.h>
>  
>  /* vmcoreinfo stuff */
> -static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
> -u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
> +static unsigned char *vmcoreinfo_data;
>  size_t vmcoreinfo_size;
> -size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
> +size_t vmcoreinfo_max_size = VMCOREINFO_BYTES;
> +u32 *vmcoreinfo_note;
>  
>  /*
>   * parsing the "crashkernel" commandline
> @@ -326,6 +326,9 @@ static void update_vmcoreinfo_note(void)
>  
>  void crash_save_vmcoreinfo(void)
>  {
> +	if (!vmcoreinfo_note)
> +		return;
> +
>  	vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds());
>  	update_vmcoreinfo_note();
>  }
> @@ -356,11 +359,27 @@ void __weak arch_crash_save_vmcoreinfo(void)
>  
>  phys_addr_t __weak paddr_vmcoreinfo_note(void)
>  {
> -	return __pa_symbol((unsigned long)(char *)&vmcoreinfo_note);
> +	return __pa(vmcoreinfo_note);
>  }
>  
>  static int __init crash_save_vmcoreinfo_init(void)
>  {
> +	/* One page should be enough for VMCOREINFO_BYTES under all archs */

Can we add a comment in the VMCOREINFO_BYTES header file about the one
page assumption?

Or just define the VMCOREINFO_BYTES as PAGE_SIZE instead of 4096
 
> +	vmcoreinfo_data = (unsigned char *)get_zeroed_page(GFP_KERNEL);
> +	if (!vmcoreinfo_data) {
> +		pr_warn("Memory allocation for vmcoreinfo_data failed\n");
> +		return -ENOMEM;
> +	}
> +
> +	vmcoreinfo_note = alloc_pages_exact(VMCOREINFO_NOTE_SIZE,
> +						GFP_KERNEL | __GFP_ZERO);
> +	if (!vmcoreinfo_note) {
> +		free_page((unsigned long)vmcoreinfo_data);
> +		vmcoreinfo_data = NULL;
> +		pr_warn("Memory allocation for vmcoreinfo_note failed\n");
> +		return -ENOMEM;
> +	}
> +
>  	VMCOREINFO_OSRELEASE(init_uts_ns.name.release);
>  	VMCOREINFO_PAGESIZE(PAGE_SIZE);
>  
> diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
> index 23cd706..c40a4e5 100644
> --- a/kernel/ksysfs.c
> +++ b/kernel/ksysfs.c
> @@ -134,7 +134,7 @@ static ssize_t vmcoreinfo_show(struct kobject *kobj,
>  {
>  	phys_addr_t vmcore_base = paddr_vmcoreinfo_note();
>  	return sprintf(buf, "%pa %x\n", &vmcore_base,
> -		       (unsigned int)sizeof(vmcoreinfo_note));
> +			(unsigned int)VMCOREINFO_NOTE_SIZE);
>  }
>  KERNEL_ATTR_RO(vmcoreinfo);
>  
> -- 
> 1.8.3.1
> 
> 
> _______________________________________________
> kexec mailing list
> kexec@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/kexec

Thanks
Dave

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v4 3/3] kdump: Protect vmcoreinfo data under the crash memory
  2017-04-26  7:09   ` Dave Young
@ 2017-04-26  9:21     ` Xunlei Pang
  2017-04-27  2:56       ` Dave Young
  0 siblings, 1 reply; 18+ messages in thread
From: Xunlei Pang @ 2017-04-26  9:21 UTC (permalink / raw)
  To: Dave Young, Xunlei Pang
  Cc: Baoquan He, kexec, Petr Tesarik, linux-kernel, Eric Biederman,
	Hari Bathini, akpm, Michael Holzheu

On 04/26/2017 at 03:09 PM, Dave Young wrote:
> On 04/20/17 at 07:39pm, Xunlei Pang wrote:
>> Currently vmcoreinfo data is updated at boot time subsys_initcall(),
>> it has the risk of being modified by some wrong code during system
>> is running.
>>
>> As a result, vmcore dumped may contain the wrong vmcoreinfo. Later on,
>> when using "crash", "makedumpfile", etc utility to parse this vmcore,
>> we probably will get "Segmentation fault" or other unexpected errors.
>>
>> E.g. 1) wrong code overwrites vmcoreinfo_data; 2) further crashes the
>> system; 3) trigger kdump, then we obviously will fail to recognize the
>> crash context correctly due to the corrupted vmcoreinfo.
>>
>> Now except for vmcoreinfo, all the crash data is well protected(including
>> the cpu note which is fully updated in the crash path, thus its correctness
>> is guaranteed). Given that vmcoreinfo data is a large chunk prepared for
>> kdump, we better protect it as well.
>>
>> To solve this, we relocate and copy vmcoreinfo_data to the crash memory
>> when kdump is loading via kexec syscalls. Because the whole crash memory
>> will be protected by existing arch_kexec_protect_crashkres() mechanism,
>> we naturally protect vmcoreinfo_data from write(even read) access under
>> kernel direct mapping after kdump is loaded.
>>
>> Since kdump is usually loaded at the very early stage after boot, we can
>> trust the correctness of the vmcoreinfo data copied.
>>
>> On the other hand, we still need to operate the vmcoreinfo safe copy when
>> crash happens to generate vmcoreinfo_note again, we rely on vmap() to map
>> out a new kernel virtual address and update to use this new one instead in
>> the following crash_save_vmcoreinfo().
>>
>> BTW, we do not touch vmcoreinfo_note, because it will be fully updated
>> using the protected vmcoreinfo_data after crash which is surely correct
>> just like the cpu crash note.
>>
>> Cc: Michael Holzheu <holzheu@linux.vnet.ibm.com>
>> Signed-off-by: Xunlei Pang <xlpang@redhat.com>
>> ---
>> v3->v4:
>> -Rebased on the latest linux-next
>> -Copy vmcoreinfo after machine_kexec_prepare()
>>
>>  include/linux/crash_core.h |  2 +-
>>  include/linux/kexec.h      |  2 ++
>>  kernel/crash_core.c        | 17 ++++++++++++++++-
>>  kernel/kexec.c             |  8 ++++++++
>>  kernel/kexec_core.c        | 39 +++++++++++++++++++++++++++++++++++++++
>>  kernel/kexec_file.c        |  8 ++++++++
>>  6 files changed, 74 insertions(+), 2 deletions(-)
>>
>> diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
>> index 7d6bc7b..5469adb 100644
>> --- a/include/linux/crash_core.h
>> +++ b/include/linux/crash_core.h
>> @@ -23,6 +23,7 @@
>>  
>>  typedef u32 note_buf_t[CRASH_CORE_NOTE_BYTES/4];
>>  
>> +void crash_update_vmcoreinfo_safecopy(void *ptr);
>>  void crash_save_vmcoreinfo(void);
>>  void arch_crash_save_vmcoreinfo(void);
>>  __printf(1, 2)
>> @@ -54,7 +55,6 @@
>>  	vmcoreinfo_append_str("PHYS_BASE=%lx\n", (unsigned long)value)
>>  
>>  extern u32 *vmcoreinfo_note;
>> -extern size_t vmcoreinfo_size;
>>  
>>  Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type,
>>  			  void *data, size_t data_len);
>> diff --git a/include/linux/kexec.h b/include/linux/kexec.h
>> index c9481eb..3ea8275 100644
>> --- a/include/linux/kexec.h
>> +++ b/include/linux/kexec.h
>> @@ -181,6 +181,7 @@ struct kimage {
>>  	unsigned long start;
>>  	struct page *control_code_page;
>>  	struct page *swap_page;
>> +	void *vmcoreinfo_data_copy; /* locates in the crash memory */
>>  
>>  	unsigned long nr_segments;
>>  	struct kexec_segment segment[KEXEC_SEGMENT_MAX];
>> @@ -250,6 +251,7 @@ extern void *kexec_purgatory_get_symbol_addr(struct kimage *image,
>>  int kexec_should_crash(struct task_struct *);
>>  int kexec_crash_loaded(void);
>>  void crash_save_cpu(struct pt_regs *regs, int cpu);
>> +extern int kimage_crash_copy_vmcoreinfo(struct kimage *image);
>>  
>>  extern struct kimage *kexec_image;
>>  extern struct kimage *kexec_crash_image;
>> diff --git a/kernel/crash_core.c b/kernel/crash_core.c
>> index 43cdb00..a29e9ad 100644
>> --- a/kernel/crash_core.c
>> +++ b/kernel/crash_core.c
>> @@ -15,9 +15,12 @@
>>  
>>  /* vmcoreinfo stuff */
>>  static unsigned char *vmcoreinfo_data;
>> -size_t vmcoreinfo_size;
>> +static size_t vmcoreinfo_size;
>>  u32 *vmcoreinfo_note;
>>  
>> +/* trusted vmcoreinfo, e.g. we can make a copy in the crash memory */
> May make it clearer like:
> /* Trusted vmcoreinfo copy in the kdump reserved memory */

My thought is that it is in crash_core.c now which should be independent of kexec/kdump,
so I used "e.g. ..." just like one use case.

>
>> +static unsigned char *vmcoreinfo_data_safecopy;
>> +
>>  /*
>>   * parsing the "crashkernel" commandline
>>   *
>> @@ -323,11 +326,23 @@ static void update_vmcoreinfo_note(void)
>>  	final_note(buf);
>>  }
>>  
>> +void crash_update_vmcoreinfo_safecopy(void *ptr)
>> +{
>> +	if (ptr)
>> +		memcpy(ptr, vmcoreinfo_data, vmcoreinfo_size);
>> +
>> +	vmcoreinfo_data_safecopy = ptr;
>> +}
>> +
>>  void crash_save_vmcoreinfo(void)
>>  {
>>  	if (!vmcoreinfo_note)
>>  		return;
>>  
>> +	/* Use the safe copy to generate vmcoreinfo note if have */
>> +	if (vmcoreinfo_data_safecopy)
>> +		vmcoreinfo_data = vmcoreinfo_data_safecopy;
>> +
>>  	vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds());
>>  	update_vmcoreinfo_note();
>>  }
>> diff --git a/kernel/kexec.c b/kernel/kexec.c
>> index 980936a..e62ec4d 100644
>> --- a/kernel/kexec.c
>> +++ b/kernel/kexec.c
>> @@ -144,6 +144,14 @@ static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
>>  	if (ret)
>>  		goto out;
>>  
>> +	/*
>> +	 * Some architecture(like S390) may touch the crash memory before
>> +	 * machine_kexec_prepare(), we must copy vmcoreinfo data after it.
>> +	 */
>> +	ret = kimage_crash_copy_vmcoreinfo(image);
>> +	if (ret)
>> +		goto out;
>> +
>>  	for (i = 0; i < nr_segments; i++) {
>>  		ret = kimage_load_segment(image, &image->segment[i]);
>>  		if (ret)
>> diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
>> index ae1a3ba..bac1b4f 100644
>> --- a/kernel/kexec_core.c
>> +++ b/kernel/kexec_core.c
>> @@ -481,6 +481,40 @@ struct page *kimage_alloc_control_pages(struct kimage *image,
>>  	return pages;
>>  }
>>  
>> +int kimage_crash_copy_vmcoreinfo(struct kimage *image)
>> +{
>> +	struct page *vmcoreinfo_page;
>> +	void *safecopy;
>> +
>> +	if (image->type != KEXEC_TYPE_CRASH)
>> +		return 0;
>> +
>> +	/*
>> +	 * For kdump, allocate one vmcoreinfo safe copy from the
>> +	 * crash memory. as we have arch_kexec_protect_crashkres()
>> +	 * after kexec syscall, we naturally protect it from write
>> +	 * (even read) access under kernel direct mapping. But on
>> +	 * the other hand, we still need to operate it when crash
>> +	 * happens to generate vmcoreinfo note, hereby we rely on
>> +	 * vmap for this purpose.
>> +	 */
>> +	vmcoreinfo_page = kimage_alloc_control_pages(image, 0);
>> +	if (!vmcoreinfo_page) {
>> +		pr_warn("Could not allocate vmcoreinfo buffer\n");
>> +		return -ENOMEM;
>> +	}
>> +	safecopy = vmap(&vmcoreinfo_page, 1, VM_MAP, PAGE_KERNEL);
>> +	if (!safecopy) {
>> +		pr_warn("Could not vmap vmcoreinfo buffer\n");
>> +		return -ENOMEM;
>> +	}
>> +
>> +	image->vmcoreinfo_data_copy = safecopy;
>> +	crash_update_vmcoreinfo_safecopy(safecopy);
>> +
>> +	return 0;
>> +}
>> +
>>  static int kimage_add_entry(struct kimage *image, kimage_entry_t entry)
>>  {
>>  	if (*image->entry != 0)
>> @@ -598,6 +632,11 @@ void kimage_free(struct kimage *image)
>>  	if (image->file_mode)
>>  		kimage_file_post_load_cleanup(image);
>>  
>> +	if (image->vmcoreinfo_data_copy) {
>> +		crash_update_vmcoreinfo_safecopy(NULL);
>> +		vunmap(image->vmcoreinfo_data_copy);
>> +	}
>> +
> Should move above chunk before the freeing of the actual page?

It should be fine, because it is allocated from the reserved memory, it doesn't
need to be freed. Anyway I can move it above to avoid confusion. Thanks!

Regards,
Xunlei

>
>>  	kfree(image);
>>  }
>>  
>> diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
>> index b118735..f78f719 100644
>> --- a/kernel/kexec_file.c
>> +++ b/kernel/kexec_file.c
>> @@ -304,6 +304,14 @@ void kimage_file_post_load_cleanup(struct kimage *image)
>>  	if (ret)
>>  		goto out;
>>  
>> +	/*
>> +	 * Some architecture(like S390) may touch the crash memory before
>> +	 * machine_kexec_prepare(), we must copy vmcoreinfo data after it.
>> +	 */
>> +	ret = kimage_crash_copy_vmcoreinfo(image);
>> +	if (ret)
>> +		goto out;
>> +
>>  	ret = kexec_calculate_store_digests(image);
>>  	if (ret)
>>  		goto out;
>> -- 
>> 1.8.3.1
>>
>>
>> _______________________________________________
>> kexec mailing list
>> kexec@lists.infradead.org
>> http://lists.infradead.org/mailman/listinfo/kexec
> Thanks
> Dave
>
> _______________________________________________
> kexec mailing list
> kexec@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v4 1/3] kexec: Move vmcoreinfo out of the kernel's .bss section
  2017-04-26  7:19 ` Dave Young
@ 2017-04-26  9:51   ` Xunlei Pang
  2017-04-26 10:18     ` Xunlei Pang
  0 siblings, 1 reply; 18+ messages in thread
From: Xunlei Pang @ 2017-04-26  9:51 UTC (permalink / raw)
  To: Dave Young, Xunlei Pang
  Cc: Juergen Gross, linux-s390, linux-ia64, Baoquan He, kexec,
	Petr Tesarik, linux-kernel, Eric Biederman, Hari Bathini, akpm,
	Michael Holzheu

On 04/26/2017 at 03:19 PM, Dave Young wrote:
> Add ia64i list,  and s390 list although Michael has tested it
>
> On 04/20/17 at 07:39pm, Xunlei Pang wrote:
>> As Eric said,
>> "what we need to do is move the variable vmcoreinfo_note out
>> of the kernel's .bss section.  And modify the code to regenerate
>> and keep this information in something like the control page.
>>
>> Definitely something like this needs a page all to itself, and ideally
>> far away from any other kernel data structures.  I clearly was not
>> watching closely the data someone decided to keep this silly thing
>> in the kernel's .bss section."
>>
>> This patch allocates extra pages for these vmcoreinfo_XXX variables,
>> one advantage is that it enhances some safety of vmcoreinfo, because
>> vmcoreinfo now is kept far away from other kernel data structures.
>>
>> Suggested-by: Eric Biederman <ebiederm@xmission.com>
>> Cc: Michael Holzheu <holzheu@linux.vnet.ibm.com>
>> Cc: Juergen Gross <jgross@suse.com>
>> Signed-off-by: Xunlei Pang <xlpang@redhat.com>
>> ---
>> v3->v4:
>> -Rebased on the latest linux-next
>> -Handle S390 vmcoreinfo_note properly
>> -Handle the newly-added xen/mmu_pv.c
>>
>>  arch/ia64/kernel/machine_kexec.c |  5 -----
>>  arch/s390/kernel/machine_kexec.c |  1 +
>>  arch/s390/kernel/setup.c         |  6 ------
>>  arch/x86/kernel/crash.c          |  2 +-
>>  arch/x86/xen/mmu_pv.c            |  4 ++--
>>  include/linux/crash_core.h       |  2 +-
>>  kernel/crash_core.c              | 27 +++++++++++++++++++++++----
>>  kernel/ksysfs.c                  |  2 +-
>>  8 files changed, 29 insertions(+), 20 deletions(-)
>>
>> diff --git a/arch/ia64/kernel/machine_kexec.c b/arch/ia64/kernel/machine_kexec.c
>> index 599507b..c14815d 100644
>> --- a/arch/ia64/kernel/machine_kexec.c
>> +++ b/arch/ia64/kernel/machine_kexec.c
>> @@ -163,8 +163,3 @@ void arch_crash_save_vmcoreinfo(void)
>>  #endif
>>  }
>>  
>> -phys_addr_t paddr_vmcoreinfo_note(void)
>> -{
>> -	return ia64_tpa((unsigned long)(char *)&vmcoreinfo_note);
>> -}
>> -
>> diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
>> index 49a6bd4..3d0b14a 100644
>> --- a/arch/s390/kernel/machine_kexec.c
>> +++ b/arch/s390/kernel/machine_kexec.c
>> @@ -246,6 +246,7 @@ void arch_crash_save_vmcoreinfo(void)
>>  	VMCOREINFO_SYMBOL(lowcore_ptr);
>>  	VMCOREINFO_SYMBOL(high_memory);
>>  	VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS);
>> +	mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note());
>>  }
>>  
>>  void machine_shutdown(void)
>> diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
>> index 3ae756c..3d1d808 100644
>> --- a/arch/s390/kernel/setup.c
>> +++ b/arch/s390/kernel/setup.c
>> @@ -496,11 +496,6 @@ static void __init setup_memory_end(void)
>>  	pr_notice("The maximum memory size is %luMB\n", memory_end >> 20);
>>  }
>>  
>> -static void __init setup_vmcoreinfo(void)
>> -{
>> -	mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note());
>> -}
>> -
>>  #ifdef CONFIG_CRASH_DUMP
>>  
>>  /*
>> @@ -939,7 +934,6 @@ void __init setup_arch(char **cmdline_p)
>>  #endif
>>  
>>  	setup_resources();
>> -	setup_vmcoreinfo();
>>  	setup_lowcore();
>>  	smp_fill_possible_mask();
>>  	cpu_detect_mhz_feature();
>> diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
>> index 22217ec..44404e2 100644
>> --- a/arch/x86/kernel/crash.c
>> +++ b/arch/x86/kernel/crash.c
>> @@ -457,7 +457,7 @@ static int prepare_elf64_headers(struct crash_elf_data *ced,
>>  	bufp += sizeof(Elf64_Phdr);
>>  	phdr->p_type = PT_NOTE;
>>  	phdr->p_offset = phdr->p_paddr = paddr_vmcoreinfo_note();
>> -	phdr->p_filesz = phdr->p_memsz = sizeof(vmcoreinfo_note);
>> +	phdr->p_filesz = phdr->p_memsz = VMCOREINFO_NOTE_SIZE;
>>  	(ehdr->e_phnum)++;
>>  
>>  #ifdef CONFIG_X86_64
>> diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
>> index 9d9ae66..35543fa 100644
>> --- a/arch/x86/xen/mmu_pv.c
>> +++ b/arch/x86/xen/mmu_pv.c
>> @@ -2723,8 +2723,8 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
>>  phys_addr_t paddr_vmcoreinfo_note(void)
>>  {
>>  	if (xen_pv_domain())
>> -		return virt_to_machine(&vmcoreinfo_note).maddr;
>> +		return virt_to_machine(vmcoreinfo_note).maddr;
>>  	else
>> -		return __pa_symbol(&vmcoreinfo_note);
>> +		return __pa(vmcoreinfo_note);
>>  }
>>  #endif /* CONFIG_KEXEC_CORE */
>> diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
>> index eb71a70..ba283a2 100644
>> --- a/include/linux/crash_core.h
>> +++ b/include/linux/crash_core.h
>> @@ -53,7 +53,7 @@
>>  #define VMCOREINFO_PHYS_BASE(value) \
>>  	vmcoreinfo_append_str("PHYS_BASE=%lx\n", (unsigned long)value)
>>  
>> -extern u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
>> +extern u32 *vmcoreinfo_note;
>>  extern size_t vmcoreinfo_size;
>>  extern size_t vmcoreinfo_max_size;
>>  
>> diff --git a/kernel/crash_core.c b/kernel/crash_core.c
>> index fcbd568..0321f04 100644
>> --- a/kernel/crash_core.c
>> +++ b/kernel/crash_core.c
>> @@ -14,10 +14,10 @@
>>  #include <asm/sections.h>
>>  
>>  /* vmcoreinfo stuff */
>> -static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
>> -u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
>> +static unsigned char *vmcoreinfo_data;
>>  size_t vmcoreinfo_size;
>> -size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
>> +size_t vmcoreinfo_max_size = VMCOREINFO_BYTES;
>> +u32 *vmcoreinfo_note;
>>  
>>  /*
>>   * parsing the "crashkernel" commandline
>> @@ -326,6 +326,9 @@ static void update_vmcoreinfo_note(void)
>>  
>>  void crash_save_vmcoreinfo(void)
>>  {
>> +	if (!vmcoreinfo_note)
>> +		return;
>> +
>>  	vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds());
>>  	update_vmcoreinfo_note();
>>  }
>> @@ -356,11 +359,27 @@ void __weak arch_crash_save_vmcoreinfo(void)
>>  
>>  phys_addr_t __weak paddr_vmcoreinfo_note(void)
>>  {
>> -	return __pa_symbol((unsigned long)(char *)&vmcoreinfo_note);
>> +	return __pa(vmcoreinfo_note);
>>  }
>>  
>>  static int __init crash_save_vmcoreinfo_init(void)
>>  {
>> +	/* One page should be enough for VMCOREINFO_BYTES under all archs */
> Can we add a comment in the VMCOREINFO_BYTES header file about the one
> page assumption?
>
> Or just define the VMCOREINFO_BYTES as PAGE_SIZE instead of 4096

Yes, I considered this before, but VMCOREINFO_BYTES is also used by VMCOREINFO_NOTE_SIZE
definition which is exported to sysfs, also some platform has larger page size(64KB), so
I didn't touch this 4096 value.

I think I should use kmalloc() to allocate both of them, then move this comment to Patch3 
kimage_crash_copy_vmcoreinfo().

Regards,
Xunlei

>  
>> +	vmcoreinfo_data = (unsigned char *)get_zeroed_page(GFP_KERNEL);
>> +	if (!vmcoreinfo_data) {
>> +		pr_warn("Memory allocation for vmcoreinfo_data failed\n");
>> +		return -ENOMEM;
>> +	}
>> +
>> +	vmcoreinfo_note = alloc_pages_exact(VMCOREINFO_NOTE_SIZE,
>> +						GFP_KERNEL | __GFP_ZERO);
>> +	if (!vmcoreinfo_note) {
>> +		free_page((unsigned long)vmcoreinfo_data);
>> +		vmcoreinfo_data = NULL;
>> +		pr_warn("Memory allocation for vmcoreinfo_note failed\n");
>> +		return -ENOMEM;
>> +	}
>> +
>>  	VMCOREINFO_OSRELEASE(init_uts_ns.name.release);
>>  	VMCOREINFO_PAGESIZE(PAGE_SIZE);
>>  
>> diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
>> index 23cd706..c40a4e5 100644
>> --- a/kernel/ksysfs.c
>> +++ b/kernel/ksysfs.c
>> @@ -134,7 +134,7 @@ static ssize_t vmcoreinfo_show(struct kobject *kobj,
>>  {
>>  	phys_addr_t vmcore_base = paddr_vmcoreinfo_note();
>>  	return sprintf(buf, "%pa %x\n", &vmcore_base,
>> -		       (unsigned int)sizeof(vmcoreinfo_note));
>> +			(unsigned int)VMCOREINFO_NOTE_SIZE);
>>  }
>>  KERNEL_ATTR_RO(vmcoreinfo);
>>  
>> -- 
>> 1.8.3.1
>>
>>
>> _______________________________________________
>> kexec mailing list
>> kexec@lists.infradead.org
>> http://lists.infradead.org/mailman/listinfo/kexec
> Thanks
> Dave
>
> _______________________________________________
> kexec mailing list
> kexec@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v4 1/3] kexec: Move vmcoreinfo out of the kernel's .bss section
  2017-04-26  9:51   ` Xunlei Pang
@ 2017-04-26 10:18     ` Xunlei Pang
  2017-04-27  3:06       ` Dave Young
  0 siblings, 1 reply; 18+ messages in thread
From: Xunlei Pang @ 2017-04-26 10:18 UTC (permalink / raw)
  To: Dave Young, Xunlei Pang
  Cc: Juergen Gross, linux-s390, linux-ia64, Baoquan He, kexec,
	Petr Tesarik, linux-kernel, Eric Biederman, Hari Bathini, akpm,
	Michael Holzheu

On 04/26/2017 at 05:51 PM, Xunlei Pang wrote:
> On 04/26/2017 at 03:19 PM, Dave Young wrote:
>> Add ia64i list,  and s390 list although Michael has tested it
>>
>> On 04/20/17 at 07:39pm, Xunlei Pang wrote:
>>> As Eric said,
>>> "what we need to do is move the variable vmcoreinfo_note out
>>> of the kernel's .bss section.  And modify the code to regenerate
>>> and keep this information in something like the control page.
>>>
>>> Definitely something like this needs a page all to itself, and ideally
>>> far away from any other kernel data structures.  I clearly was not
>>> watching closely the data someone decided to keep this silly thing
>>> in the kernel's .bss section."
>>>
>>> This patch allocates extra pages for these vmcoreinfo_XXX variables,
>>> one advantage is that it enhances some safety of vmcoreinfo, because
>>> vmcoreinfo now is kept far away from other kernel data structures.
>>>
>>> Suggested-by: Eric Biederman <ebiederm@xmission.com>
>>> Cc: Michael Holzheu <holzheu@linux.vnet.ibm.com>
>>> Cc: Juergen Gross <jgross@suse.com>
>>> Signed-off-by: Xunlei Pang <xlpang@redhat.com>
>>> ---
>>> v3->v4:
>>> -Rebased on the latest linux-next
>>> -Handle S390 vmcoreinfo_note properly
>>> -Handle the newly-added xen/mmu_pv.c
>>>
>>>  arch/ia64/kernel/machine_kexec.c |  5 -----
>>>  arch/s390/kernel/machine_kexec.c |  1 +
>>>  arch/s390/kernel/setup.c         |  6 ------
>>>  arch/x86/kernel/crash.c          |  2 +-
>>>  arch/x86/xen/mmu_pv.c            |  4 ++--
>>>  include/linux/crash_core.h       |  2 +-
>>>  kernel/crash_core.c              | 27 +++++++++++++++++++++++----
>>>  kernel/ksysfs.c                  |  2 +-
>>>  8 files changed, 29 insertions(+), 20 deletions(-)
>>>
>>> diff --git a/arch/ia64/kernel/machine_kexec.c b/arch/ia64/kernel/machine_kexec.c
>>> index 599507b..c14815d 100644
>>> --- a/arch/ia64/kernel/machine_kexec.c
>>> +++ b/arch/ia64/kernel/machine_kexec.c
>>> @@ -163,8 +163,3 @@ void arch_crash_save_vmcoreinfo(void)
>>>  #endif
>>>  }
>>>  
>>> -phys_addr_t paddr_vmcoreinfo_note(void)
>>> -{
>>> -	return ia64_tpa((unsigned long)(char *)&vmcoreinfo_note);
>>> -}
>>> -
>>> diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
>>> index 49a6bd4..3d0b14a 100644
>>> --- a/arch/s390/kernel/machine_kexec.c
>>> +++ b/arch/s390/kernel/machine_kexec.c
>>> @@ -246,6 +246,7 @@ void arch_crash_save_vmcoreinfo(void)
>>>  	VMCOREINFO_SYMBOL(lowcore_ptr);
>>>  	VMCOREINFO_SYMBOL(high_memory);
>>>  	VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS);
>>> +	mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note());
>>>  }
>>>  
>>>  void machine_shutdown(void)
>>> diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
>>> index 3ae756c..3d1d808 100644
>>> --- a/arch/s390/kernel/setup.c
>>> +++ b/arch/s390/kernel/setup.c
>>> @@ -496,11 +496,6 @@ static void __init setup_memory_end(void)
>>>  	pr_notice("The maximum memory size is %luMB\n", memory_end >> 20);
>>>  }
>>>  
>>> -static void __init setup_vmcoreinfo(void)
>>> -{
>>> -	mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note());
>>> -}
>>> -
>>>  #ifdef CONFIG_CRASH_DUMP
>>>  
>>>  /*
>>> @@ -939,7 +934,6 @@ void __init setup_arch(char **cmdline_p)
>>>  #endif
>>>  
>>>  	setup_resources();
>>> -	setup_vmcoreinfo();
>>>  	setup_lowcore();
>>>  	smp_fill_possible_mask();
>>>  	cpu_detect_mhz_feature();
>>> diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
>>> index 22217ec..44404e2 100644
>>> --- a/arch/x86/kernel/crash.c
>>> +++ b/arch/x86/kernel/crash.c
>>> @@ -457,7 +457,7 @@ static int prepare_elf64_headers(struct crash_elf_data *ced,
>>>  	bufp += sizeof(Elf64_Phdr);
>>>  	phdr->p_type = PT_NOTE;
>>>  	phdr->p_offset = phdr->p_paddr = paddr_vmcoreinfo_note();
>>> -	phdr->p_filesz = phdr->p_memsz = sizeof(vmcoreinfo_note);
>>> +	phdr->p_filesz = phdr->p_memsz = VMCOREINFO_NOTE_SIZE;
>>>  	(ehdr->e_phnum)++;
>>>  
>>>  #ifdef CONFIG_X86_64
>>> diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
>>> index 9d9ae66..35543fa 100644
>>> --- a/arch/x86/xen/mmu_pv.c
>>> +++ b/arch/x86/xen/mmu_pv.c
>>> @@ -2723,8 +2723,8 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
>>>  phys_addr_t paddr_vmcoreinfo_note(void)
>>>  {
>>>  	if (xen_pv_domain())
>>> -		return virt_to_machine(&vmcoreinfo_note).maddr;
>>> +		return virt_to_machine(vmcoreinfo_note).maddr;
>>>  	else
>>> -		return __pa_symbol(&vmcoreinfo_note);
>>> +		return __pa(vmcoreinfo_note);
>>>  }
>>>  #endif /* CONFIG_KEXEC_CORE */
>>> diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
>>> index eb71a70..ba283a2 100644
>>> --- a/include/linux/crash_core.h
>>> +++ b/include/linux/crash_core.h
>>> @@ -53,7 +53,7 @@
>>>  #define VMCOREINFO_PHYS_BASE(value) \
>>>  	vmcoreinfo_append_str("PHYS_BASE=%lx\n", (unsigned long)value)
>>>  
>>> -extern u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
>>> +extern u32 *vmcoreinfo_note;
>>>  extern size_t vmcoreinfo_size;
>>>  extern size_t vmcoreinfo_max_size;
>>>  
>>> diff --git a/kernel/crash_core.c b/kernel/crash_core.c
>>> index fcbd568..0321f04 100644
>>> --- a/kernel/crash_core.c
>>> +++ b/kernel/crash_core.c
>>> @@ -14,10 +14,10 @@
>>>  #include <asm/sections.h>
>>>  
>>>  /* vmcoreinfo stuff */
>>> -static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
>>> -u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
>>> +static unsigned char *vmcoreinfo_data;
>>>  size_t vmcoreinfo_size;
>>> -size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
>>> +size_t vmcoreinfo_max_size = VMCOREINFO_BYTES;
>>> +u32 *vmcoreinfo_note;
>>>  
>>>  /*
>>>   * parsing the "crashkernel" commandline
>>> @@ -326,6 +326,9 @@ static void update_vmcoreinfo_note(void)
>>>  
>>>  void crash_save_vmcoreinfo(void)
>>>  {
>>> +	if (!vmcoreinfo_note)
>>> +		return;
>>> +
>>>  	vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds());
>>>  	update_vmcoreinfo_note();
>>>  }
>>> @@ -356,11 +359,27 @@ void __weak arch_crash_save_vmcoreinfo(void)
>>>  
>>>  phys_addr_t __weak paddr_vmcoreinfo_note(void)
>>>  {
>>> -	return __pa_symbol((unsigned long)(char *)&vmcoreinfo_note);
>>> +	return __pa(vmcoreinfo_note);
>>>  }
>>>  
>>>  static int __init crash_save_vmcoreinfo_init(void)
>>>  {
>>> +	/* One page should be enough for VMCOREINFO_BYTES under all archs */
>> Can we add a comment in the VMCOREINFO_BYTES header file about the one
>> page assumption?
>>
>> Or just define the VMCOREINFO_BYTES as PAGE_SIZE instead of 4096
> Yes, I considered this before, but VMCOREINFO_BYTES is also used by VMCOREINFO_NOTE_SIZE
> definition which is exported to sysfs, also some platform has larger page size(64KB), so
> I didn't touch this 4096 value.
>
> I think I should use kmalloc() to allocate both of them, then move this comment to Patch3 
> kimage_crash_copy_vmcoreinfo().

But on the other hand, using a separate page for them seems safer compared with
using frequently-used slab, what's your opinion?

Regards,
Xunlei

>
> Regards,
> Xunlei
>
>>  
>>> +	vmcoreinfo_data = (unsigned char *)get_zeroed_page(GFP_KERNEL);
>>> +	if (!vmcoreinfo_data) {
>>> +		pr_warn("Memory allocation for vmcoreinfo_data failed\n");
>>> +		return -ENOMEM;
>>> +	}
>>> +
>>> +	vmcoreinfo_note = alloc_pages_exact(VMCOREINFO_NOTE_SIZE,
>>> +						GFP_KERNEL | __GFP_ZERO);
>>> +	if (!vmcoreinfo_note) {
>>> +		free_page((unsigned long)vmcoreinfo_data);
>>> +		vmcoreinfo_data = NULL;
>>> +		pr_warn("Memory allocation for vmcoreinfo_note failed\n");
>>> +		return -ENOMEM;
>>> +	}
>>> +
>>>  	VMCOREINFO_OSRELEASE(init_uts_ns.name.release);
>>>  	VMCOREINFO_PAGESIZE(PAGE_SIZE);
>>>  
>>> diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
>>> index 23cd706..c40a4e5 100644
>>> --- a/kernel/ksysfs.c
>>> +++ b/kernel/ksysfs.c
>>> @@ -134,7 +134,7 @@ static ssize_t vmcoreinfo_show(struct kobject *kobj,
>>>  {
>>>  	phys_addr_t vmcore_base = paddr_vmcoreinfo_note();
>>>  	return sprintf(buf, "%pa %x\n", &vmcore_base,
>>> -		       (unsigned int)sizeof(vmcoreinfo_note));
>>> +			(unsigned int)VMCOREINFO_NOTE_SIZE);
>>>  }
>>>  KERNEL_ATTR_RO(vmcoreinfo);
>>>  
>>> -- 
>>> 1.8.3.1
>>>
>>>
>>> _______________________________________________
>>> kexec mailing list
>>> kexec@lists.infradead.org
>>> http://lists.infradead.org/mailman/listinfo/kexec
>> Thanks
>> Dave
>>
>> _______________________________________________
>> kexec mailing list
>> kexec@lists.infradead.org
>> http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v4 3/3] kdump: Protect vmcoreinfo data under the crash memory
  2017-04-26  9:21     ` Xunlei Pang
@ 2017-04-27  2:56       ` Dave Young
  0 siblings, 0 replies; 18+ messages in thread
From: Dave Young @ 2017-04-27  2:56 UTC (permalink / raw)
  To: xlpang
  Cc: Baoquan He, kexec, Petr Tesarik, linux-kernel, Eric Biederman,
	Hari Bathini, akpm, Michael Holzheu

[snip]
> >> index 43cdb00..a29e9ad 100644
> >> --- a/kernel/crash_core.c
> >> +++ b/kernel/crash_core.c
> >> @@ -15,9 +15,12 @@
> >>  
> >>  /* vmcoreinfo stuff */
> >>  static unsigned char *vmcoreinfo_data;
> >> -size_t vmcoreinfo_size;
> >> +static size_t vmcoreinfo_size;
> >>  u32 *vmcoreinfo_note;
> >>  
> >> +/* trusted vmcoreinfo, e.g. we can make a copy in the crash memory */
> > May make it clearer like:
> > /* Trusted vmcoreinfo copy in the kdump reserved memory */
> 
> My thought is that it is in crash_core.c now which should be independent of kexec/kdump,
> so I used "e.g. ..." just like one use case.

Ok, then it is fine.

[snip]
> >>  static int kimage_add_entry(struct kimage *image, kimage_entry_t entry)
> >>  {
> >>  	if (*image->entry != 0)
> >> @@ -598,6 +632,11 @@ void kimage_free(struct kimage *image)
> >>  	if (image->file_mode)
> >>  		kimage_file_post_load_cleanup(image);
> >>  
> >> +	if (image->vmcoreinfo_data_copy) {
> >> +		crash_update_vmcoreinfo_safecopy(NULL);
> >> +		vunmap(image->vmcoreinfo_data_copy);
> >> +	}
> >> +
> > Should move above chunk before the freeing of the actual page?
> 
> It should be fine, because it is allocated from the reserved memory, it doesn't
> need to be freed. Anyway I can move it above to avoid confusion. Thanks!
> 

Yes, it looks better, thanks for explanation.

Thanks
Dave

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v4 1/3] kexec: Move vmcoreinfo out of the kernel's .bss section
  2017-04-26 10:18     ` Xunlei Pang
@ 2017-04-27  3:06       ` Dave Young
  2017-04-27  5:25         ` Xunlei Pang
  0 siblings, 1 reply; 18+ messages in thread
From: Dave Young @ 2017-04-27  3:06 UTC (permalink / raw)
  To: xlpang
  Cc: Juergen Gross, linux-s390, linux-ia64, Baoquan He, kexec,
	Petr Tesarik, linux-kernel, Eric Biederman, Hari Bathini, akpm,
	Michael Holzheu

[snip]
> >>>  
> >>>  static int __init crash_save_vmcoreinfo_init(void)
> >>>  {
> >>> +	/* One page should be enough for VMCOREINFO_BYTES under all archs */
> >> Can we add a comment in the VMCOREINFO_BYTES header file about the one
> >> page assumption?
> >>
> >> Or just define the VMCOREINFO_BYTES as PAGE_SIZE instead of 4096
> > Yes, I considered this before, but VMCOREINFO_BYTES is also used by VMCOREINFO_NOTE_SIZE
> > definition which is exported to sysfs, also some platform has larger page size(64KB), so
> > I didn't touch this 4096 value.
> >
> > I think I should use kmalloc() to allocate both of them, then move this comment to Patch3 
> > kimage_crash_copy_vmcoreinfo().
> 
> But on the other hand, using a separate page for them seems safer compared with
> using frequently-used slab, what's your opinion?

I feel current page based way is better.

For 64k page the vmcore note size will increase it seems fine. Do you
have concern in mind?

Thanks

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v4 1/3] kexec: Move vmcoreinfo out of the kernel's .bss section
  2017-04-27  3:06       ` Dave Young
@ 2017-04-27  5:25         ` Xunlei Pang
  2017-04-27  5:44           ` Dave Young
  0 siblings, 1 reply; 18+ messages in thread
From: Xunlei Pang @ 2017-04-27  5:25 UTC (permalink / raw)
  To: Dave Young, xlpang
  Cc: Juergen Gross, linux-s390, linux-ia64, Baoquan He, Petr Tesarik,
	kexec, linux-kernel, Eric Biederman, Hari Bathini, akpm,
	Michael Holzheu

On 04/27/2017 at 11:06 AM, Dave Young wrote:
> [snip]
>>>>>  
>>>>>  static int __init crash_save_vmcoreinfo_init(void)
>>>>>  {
>>>>> +	/* One page should be enough for VMCOREINFO_BYTES under all archs */
>>>> Can we add a comment in the VMCOREINFO_BYTES header file about the one
>>>> page assumption?
>>>>
>>>> Or just define the VMCOREINFO_BYTES as PAGE_SIZE instead of 4096
>>> Yes, I considered this before, but VMCOREINFO_BYTES is also used by VMCOREINFO_NOTE_SIZE
>>> definition which is exported to sysfs, also some platform has larger page size(64KB), so
>>> I didn't touch this 4096 value.
>>>
>>> I think I should use kmalloc() to allocate both of them, then move this comment to Patch3 
>>> kimage_crash_copy_vmcoreinfo().
>> But on the other hand, using a separate page for them seems safer compared with
>> using frequently-used slab, what's your opinion?
> I feel current page based way is better.
>
> For 64k page the vmcore note size will increase it seems fine. Do you
> have concern in mind?

Since tools are supposed to acquire vmcoreinfo note size from sysfs, it should be safe to do so,
except that there is some waste in memory for larger PAGE_SIZE.

Regards,
Xunlei

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v4 1/3] kexec: Move vmcoreinfo out of the kernel's .bss section
  2017-04-27  5:25         ` Xunlei Pang
@ 2017-04-27  5:44           ` Dave Young
  2017-04-27  6:08             ` Xunlei Pang
  0 siblings, 1 reply; 18+ messages in thread
From: Dave Young @ 2017-04-27  5:44 UTC (permalink / raw)
  To: xlpang
  Cc: Juergen Gross, linux-s390, linux-ia64, Baoquan He, Petr Tesarik,
	kexec, linux-kernel, Eric Biederman, Hari Bathini, akpm,
	Michael Holzheu

Hi Xunlei,

On 04/27/17 at 01:25pm, Xunlei Pang wrote:
> On 04/27/2017 at 11:06 AM, Dave Young wrote:
> > [snip]
> >>>>>  
> >>>>>  static int __init crash_save_vmcoreinfo_init(void)
> >>>>>  {
> >>>>> +	/* One page should be enough for VMCOREINFO_BYTES under all archs */
> >>>> Can we add a comment in the VMCOREINFO_BYTES header file about the one
> >>>> page assumption?
> >>>>
> >>>> Or just define the VMCOREINFO_BYTES as PAGE_SIZE instead of 4096
> >>> Yes, I considered this before, but VMCOREINFO_BYTES is also used by VMCOREINFO_NOTE_SIZE
> >>> definition which is exported to sysfs, also some platform has larger page size(64KB), so
> >>> I didn't touch this 4096 value.
> >>>
> >>> I think I should use kmalloc() to allocate both of them, then move this comment to Patch3 
> >>> kimage_crash_copy_vmcoreinfo().
> >> But on the other hand, using a separate page for them seems safer compared with
> >> using frequently-used slab, what's your opinion?
> > I feel current page based way is better.
> >
> > For 64k page the vmcore note size will increase it seems fine. Do you
> > have concern in mind?
> 
> Since tools are supposed to acquire vmcoreinfo note size from sysfs, it should be safe to do so,
> except that there is some waste in memory for larger PAGE_SIZE.

Either way is fine to me, I think it is up to your implementation, if
choose page alloc then modify the macro with PAGE_SIZE looks better.

Thanks
Dave

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v4 1/3] kexec: Move vmcoreinfo out of the kernel's .bss section
  2017-04-27  5:44           ` Dave Young
@ 2017-04-27  6:08             ` Xunlei Pang
  0 siblings, 0 replies; 18+ messages in thread
From: Xunlei Pang @ 2017-04-27  6:08 UTC (permalink / raw)
  To: Dave Young, xlpang
  Cc: Juergen Gross, linux-s390, linux-ia64, Baoquan He, Petr Tesarik,
	kexec, linux-kernel, Eric Biederman, Hari Bathini, akpm,
	Michael Holzheu

On 04/27/2017 at 01:44 PM, Dave Young wrote:
> Hi Xunlei,
>
> On 04/27/17 at 01:25pm, Xunlei Pang wrote:
>> On 04/27/2017 at 11:06 AM, Dave Young wrote:
>>> [snip]
>>>>>>>  
>>>>>>>  static int __init crash_save_vmcoreinfo_init(void)
>>>>>>>  {
>>>>>>> +	/* One page should be enough for VMCOREINFO_BYTES under all archs */
>>>>>> Can we add a comment in the VMCOREINFO_BYTES header file about the one
>>>>>> page assumption?
>>>>>>
>>>>>> Or just define the VMCOREINFO_BYTES as PAGE_SIZE instead of 4096
>>>>> Yes, I considered this before, but VMCOREINFO_BYTES is also used by VMCOREINFO_NOTE_SIZE
>>>>> definition which is exported to sysfs, also some platform has larger page size(64KB), so
>>>>> I didn't touch this 4096 value.
>>>>>
>>>>> I think I should use kmalloc() to allocate both of them, then move this comment to Patch3 
>>>>> kimage_crash_copy_vmcoreinfo().
>>>> But on the other hand, using a separate page for them seems safer compared with
>>>> using frequently-used slab, what's your opinion?
>>> I feel current page based way is better.
>>>
>>> For 64k page the vmcore note size will increase it seems fine. Do you
>>> have concern in mind?
>> Since tools are supposed to acquire vmcoreinfo note size from sysfs, it should be safe to do so,
>> except that there is some waste in memory for larger PAGE_SIZE.
> Either way is fine to me, I think it is up to your implementation, if
> choose page alloc then modify the macro with PAGE_SIZE looks better.

OK, I will use PAGE_SIZE then, thanks for your comments.

>
> Thanks
> Dave
>
> _______________________________________________
> kexec mailing list
> kexec@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v4 2/3] powerpc/fadump: Use the correct VMCOREINFO_NOTE_SIZE for phdr
  2017-04-26  7:11   ` Dave Young
@ 2017-04-27  7:38     ` Mahesh Jagannath Salgaonkar
  0 siblings, 0 replies; 18+ messages in thread
From: Mahesh Jagannath Salgaonkar @ 2017-04-27  7:38 UTC (permalink / raw)
  To: Dave Young, Xunlei Pang
  Cc: Baoquan He, kexec, Petr Tesarik, linux-kernel, Eric Biederman,
	Hari Bathini, akpm, linuxppc-dev

On 04/26/2017 12:41 PM, Dave Young wrote:
> Ccing ppc list
> On 04/20/17 at 07:39pm, Xunlei Pang wrote:
>> vmcoreinfo_max_size stands for the vmcoreinfo_data, the
>> correct one we should use is vmcoreinfo_note whose total
>> size is VMCOREINFO_NOTE_SIZE.
>>
>> Like explained in commit 77019967f06b ("kdump: fix exported
>> size of vmcoreinfo note"), it should not affect the actual
>> function, but we better fix it, also this change should be
>> safe and backward compatible.
>>
>> After this, we can get rid of variable vmcoreinfo_max_size,
>> let's use the corresponding macros directly, fewer variables
>> means more safety for vmcoreinfo operation.
>>
>> Cc: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
>> Cc: Hari Bathini <hbathini@linux.vnet.ibm.com>
>> Signed-off-by: Xunlei Pang <xlpang@redhat.com>

Reviewed-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>

Thanks,
-Mahesh.

>> ---
>> v3->v4:
>> -Rebased on the latest linux-next
>>
>>  arch/powerpc/kernel/fadump.c | 3 +--
>>  include/linux/crash_core.h   | 1 -
>>  kernel/crash_core.c          | 3 +--
>>  3 files changed, 2 insertions(+), 5 deletions(-)
>>
>> diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
>> index 466569e..7bd6cd0 100644
>> --- a/arch/powerpc/kernel/fadump.c
>> +++ b/arch/powerpc/kernel/fadump.c
>> @@ -893,8 +893,7 @@ static int fadump_create_elfcore_headers(char *bufp)
>>  
>>  	phdr->p_paddr	= fadump_relocate(paddr_vmcoreinfo_note());
>>  	phdr->p_offset	= phdr->p_paddr;
>> -	phdr->p_memsz	= vmcoreinfo_max_size;
>> -	phdr->p_filesz	= vmcoreinfo_max_size;
>> +	phdr->p_memsz	= phdr->p_filesz = VMCOREINFO_NOTE_SIZE;
>>  
>>  	/* Increment number of program headers. */
>>  	(elf->e_phnum)++;
>> diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
>> index ba283a2..7d6bc7b 100644
>> --- a/include/linux/crash_core.h
>> +++ b/include/linux/crash_core.h
>> @@ -55,7 +55,6 @@
>>  
>>  extern u32 *vmcoreinfo_note;
>>  extern size_t vmcoreinfo_size;
>> -extern size_t vmcoreinfo_max_size;
>>  
>>  Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type,
>>  			  void *data, size_t data_len);
>> diff --git a/kernel/crash_core.c b/kernel/crash_core.c
>> index 0321f04..43cdb00 100644
>> --- a/kernel/crash_core.c
>> +++ b/kernel/crash_core.c
>> @@ -16,7 +16,6 @@
>>  /* vmcoreinfo stuff */
>>  static unsigned char *vmcoreinfo_data;
>>  size_t vmcoreinfo_size;
>> -size_t vmcoreinfo_max_size = VMCOREINFO_BYTES;
>>  u32 *vmcoreinfo_note;
>>  
>>  /*
>> @@ -343,7 +342,7 @@ void vmcoreinfo_append_str(const char *fmt, ...)
>>  	r = vscnprintf(buf, sizeof(buf), fmt, args);
>>  	va_end(args);
>>  
>> -	r = min(r, vmcoreinfo_max_size - vmcoreinfo_size);
>> +	r = min(r, VMCOREINFO_BYTES - vmcoreinfo_size);
>>  
>>  	memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r);
>>  
>> -- 
>> 1.8.3.1
>>
>>
>> _______________________________________________
>> kexec mailing list
>> kexec@lists.infradead.org
>> http://lists.infradead.org/mailman/listinfo/kexec
> 
> Reviewed-by: Dave Young <dyoung@redhat.com>
> 
> Thanks
> Dave
> 

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2017-04-27  7:39 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-04-20 11:39 [PATCH v4 1/3] kexec: Move vmcoreinfo out of the kernel's .bss section Xunlei Pang
2017-04-20 11:39 ` [PATCH v4 2/3] powerpc/fadump: Use the correct VMCOREINFO_NOTE_SIZE for phdr Xunlei Pang
2017-04-21  0:55   ` Xunlei Pang
2017-04-26  7:11   ` Dave Young
2017-04-27  7:38     ` Mahesh Jagannath Salgaonkar
2017-04-20 11:39 ` [PATCH v4 3/3] kdump: Protect vmcoreinfo data under the crash memory Xunlei Pang
2017-04-26  7:09   ` Dave Young
2017-04-26  9:21     ` Xunlei Pang
2017-04-27  2:56       ` Dave Young
2017-04-20 14:24 ` [PATCH v4 1/3] kexec: Move vmcoreinfo out of the kernel's .bss section Juergen Gross
2017-04-24 15:18 ` Michael Holzheu
2017-04-26  7:19 ` Dave Young
2017-04-26  9:51   ` Xunlei Pang
2017-04-26 10:18     ` Xunlei Pang
2017-04-27  3:06       ` Dave Young
2017-04-27  5:25         ` Xunlei Pang
2017-04-27  5:44           ` Dave Young
2017-04-27  6:08             ` Xunlei Pang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).