All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 1/2] fadump: reduce memory consumption for capture kernel
@ 2017-02-07 12:38 Hari Bathini
  2017-02-07 12:38 ` [PATCH v2 2/2] fadump: update documentation about introduction of handover area Hari Bathini
  2017-04-07  1:54 ` [PATCH v2 1/2] fadump: reduce memory consumption for capture kernel Michael Ellerman
  0 siblings, 2 replies; 19+ messages in thread
From: Hari Bathini @ 2017-02-07 12:38 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev, Mahesh J Salgaonkar

In case of fadump, capture (fadump) kernel boots like a normal kernel.
While this has its advantages, the capture kernel would initialize all
the components like normal kernel, which may not necessarily be needed
for a typical dump capture kernel. So, fadump capture kernel ends up
needing more memory than a typical (read kdump) capture kernel to boot.

This can be overcome by introducing parameters like fadump_nr_cpus=1,
similar to nr_cpus=1 parameter, applicable only when fadump is active.
But this approach needs introduction of special parameters applicable
only when fadump is active (capture kernel), for every parameter that
reduces memory/resource consumption.

A better approach would be to pass extra parameters to fadump capture
kernel. As firmware leaves the memory contents intact from the time of
crash till the new kernel is booted up, parameters to append to capture
kernel can be saved in real memory region and retrieved later when the
capture kernel is in its early boot process for appending to command
line parameters.

This patch introduces a new node /sys/kernel/fadump_cmdline_append to
specify the parameters to pass to fadump capture kernel, saves them in
real memory region and appends these parameters to capture kernel early
in its boot process.

Signed-off-by: Hari Bathini <hbathini@linux.vnet.ibm.com>
---

Changes from v1:
* Not changing dump format version to keep compatibility intact. Using
  start and end markers instead, to check sanity of handover area.
* Checking for memory overlap with current kernel before setting up
  a handover area.


 arch/powerpc/include/asm/fadump.h |   31 +++++++
 arch/powerpc/kernel/fadump.c      |  158 +++++++++++++++++++++++++++++++++++++
 arch/powerpc/kernel/prom.c        |   22 +++++
 3 files changed, 210 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h
index 0031806..e6b3dc0 100644
--- a/arch/powerpc/include/asm/fadump.h
+++ b/arch/powerpc/include/asm/fadump.h
@@ -24,6 +24,8 @@
 
 #ifdef CONFIG_FA_DUMP
 
+#include <asm/setup.h>
+
 /*
  * The RMA region will be saved for later dumping when kernel crashes.
  * RMA is Real Mode Area, the first block of logical memory address owned
@@ -126,6 +128,13 @@ struct fw_dump {
 	/* cmd line option during boot */
 	unsigned long	reserve_bootvar;
 
+	/*
+	 * Area to pass info to capture (fadump) kernel. For now,
+	 * we are only passing parameters to append.
+	 */
+	unsigned long	handover_area_start;
+	unsigned long	handover_area_size;
+
 	unsigned long	fadumphdr_addr;
 	unsigned long	cpu_notes_buf;
 	unsigned long	cpu_notes_buf_size;
@@ -159,6 +168,27 @@ static inline u64 str_to_u64(const char *str)
 #define FADUMP_CRASH_INFO_MAGIC		STR_TO_HEX("FADMPINF")
 #define REGSAVE_AREA_MAGIC		STR_TO_HEX("REGSAVE")
 
+/*
+ * Start address for the area to pass off certain configuration details
+ * like parameters to append to the commandline for a capture (fadump) kernel.
+ * Will refer to this area as handover area henceforth. Setting start address
+ * of handover area to 128MB as this area needs to be accessed in realmode.
+ */
+#define FADUMP_HANDOVER_AREA_START	(1UL << 27)
+#define FADUMP_HANDOVER_AREA_SIZE	(sizeof(struct fadump_handover_info) \
+					 + H_END_MARKER_SIZE)
+
+#define H_AREA_START_MARKER		STR_TO_HEX("HDRSTART")
+#define H_AREA_END_MARKER		STR_TO_HEX("HOVEREND")
+#define H_END_MARKER_SIZE		8
+
+/* config info to be passed to capture kernel */
+struct fadump_handover_info {
+	u64		start_marker;
+	u64		size;
+	char		params[COMMAND_LINE_SIZE/2];
+};
+
 /* The firmware-assisted dump format.
  *
  * The register save area is an area in the partition's memory used to preserve
@@ -200,6 +230,7 @@ struct fad_crash_memory_ranges {
 
 extern int early_init_dt_scan_fw_dump(unsigned long node,
 		const char *uname, int depth, void *data);
+extern char *get_fadump_parameters_realmode(void);
 extern int fadump_reserve_mem(void);
 extern int setup_fadump(void);
 extern int is_fadump_active(void);
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 8f0c7c5..eab26e9 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -41,7 +41,6 @@
 #include <asm/rtas.h>
 #include <asm/fadump.h>
 #include <asm/debug.h>
-#include <asm/setup.h>
 
 static struct fw_dump fw_dump;
 static struct fadump_mem_struct fdm;
@@ -51,6 +50,8 @@ static DEFINE_MUTEX(fadump_mutex);
 struct fad_crash_memory_ranges crash_memory_ranges[INIT_CRASHMEM_RANGES];
 int crash_mem_ranges;
 
+extern char _stext[], _end[];
+
 /* Scan the Firmware Assisted dump configuration details. */
 int __init early_init_dt_scan_fw_dump(unsigned long node,
 			const char *uname, int depth, void *data)
@@ -74,6 +75,9 @@ int __init early_init_dt_scan_fw_dump(unsigned long node,
 	fw_dump.fadump_supported = 1;
 	fw_dump.ibm_configure_kernel_dump = be32_to_cpu(*token);
 
+	fw_dump.handover_area_start = FADUMP_HANDOVER_AREA_START;
+	fw_dump.handover_area_size = PAGE_ALIGN(FADUMP_HANDOVER_AREA_SIZE);
+
 	/*
 	 * The 'ibm,kernel-dump' rtas node is present only if there is
 	 * dump data waiting for us.
@@ -253,6 +257,34 @@ static unsigned long get_fadump_area_size(void)
 	return size;
 }
 
+static char *get_fadump_params_buf(struct fadump_handover_info *h_info)
+{
+	char *params = NULL;
+	u64 *end_marker;
+
+	if (h_info->start_marker == H_AREA_START_MARKER) {
+		end_marker = (u64 *)h_info;
+		end_marker += ((h_info->size - H_END_MARKER_SIZE) /
+			       sizeof(*end_marker));
+		if (*end_marker == H_AREA_END_MARKER)
+			params = h_info->params;
+	}
+
+	return params;
+}
+
+char * __init get_fadump_parameters_realmode(void)
+{
+	char *params = NULL;
+	struct fadump_handover_info *h_info =
+		(struct fadump_handover_info *)fw_dump.handover_area_start;
+
+	if (fdm_active)
+		params = get_fadump_params_buf(h_info);
+
+	return params;
+}
+
 int __init fadump_reserve_mem(void)
 {
 	unsigned long base, size, memory_boundary;
@@ -285,6 +317,7 @@ int __init fadump_reserve_mem(void)
 	 */
 	if (memory_limit && memory_limit < memblock_end_of_DRAM()) {
 		size = get_fadump_area_size();
+		size += fw_dump.handover_area_size;
 		if ((memory_limit + size) < memblock_end_of_DRAM())
 			memory_limit += size;
 		else
@@ -297,6 +330,28 @@ int __init fadump_reserve_mem(void)
 	else
 		memory_boundary = memblock_end_of_DRAM();
 
+	/*
+	 * Ensure that the memory marked for handover area doesn't overlap
+	 * with current kernel and reserve it to pass config info like
+	 * parameters to append to capture (fadump) kernel.
+	 */
+	base = __pa(_stext);
+	size = _end - _stext;
+	if (((base + size) > fw_dump.handover_area_start) && (base <=
+	    (fw_dump.handover_area_start + fw_dump.handover_area_size - 1))) {
+		/*
+		 * If the area marked for handover area overlaps with
+		 * current kernel, avoid using a handover area.
+		 */
+		fw_dump.handover_area_size = 0;
+	} else {
+		memblock_reserve(fw_dump.handover_area_start,
+				 fw_dump.handover_area_size);
+		pr_info("Reserved %lu bytes at 0x%lx for passing certain config"
+			" info to capture kernel\n", fw_dump.handover_area_size,
+			fw_dump.handover_area_start);
+	}
+
 	if (fw_dump.dump_active) {
 		printk(KERN_INFO "Firmware-assisted dump is active.\n");
 		/*
@@ -926,6 +981,23 @@ static int fadump_create_elfcore_headers(char *bufp)
 	return 0;
 }
 
+static void init_fadump_handover_area(void)
+{
+	struct fadump_handover_info *h_info;
+	u64 *end_marker;
+
+	if (fw_dump.handover_area_size == 0)
+		return;
+
+	h_info = __va(fw_dump.handover_area_start);
+	memset(h_info, 0, fw_dump.handover_area_size);
+	h_info->start_marker = H_AREA_START_MARKER;
+	h_info->size = fw_dump.handover_area_size;
+	end_marker = (u64 *)h_info;
+	end_marker += (h_info->size - H_END_MARKER_SIZE)/sizeof(*end_marker);
+	*end_marker = H_AREA_END_MARKER;
+}
+
 static unsigned long init_fadump_header(unsigned long addr)
 {
 	struct fadump_crash_info_header *fdh;
@@ -1137,6 +1209,14 @@ static ssize_t fadump_register_show(struct kobject *kobj,
 	return sprintf(buf, "%d\n", fw_dump.dump_registered);
 }
 
+static ssize_t fadump_params_show(struct kobject *kobj,
+				   struct kobj_attribute *attr,
+				   char *buf)
+{
+	return sprintf(buf, "%s\n",
+		get_fadump_params_buf(__va(fw_dump.handover_area_start)));
+}
+
 static ssize_t fadump_register_store(struct kobject *kobj,
 					struct kobj_attribute *attr,
 					const char *buf, size_t count)
@@ -1175,6 +1255,69 @@ static ssize_t fadump_register_store(struct kobject *kobj,
 	return ret < 0 ? ret : count;
 }
 
+static ssize_t fadump_params_store(struct kobject *kobj,
+				   struct kobj_attribute *attr,
+				   const char *buf, size_t count)
+{
+	int i, ret;
+	bool is_truncated = false;
+	char *ptr;
+	size_t size;
+	struct fadump_handover_info *h_info;
+	char *h_params_buf;
+
+	h_info = __va(fw_dump.handover_area_start);
+	h_params_buf = get_fadump_params_buf(h_info);
+	size = sizeof(h_info->params) - 1;
+
+	if (!fw_dump.fadump_enabled || fdm_active || !h_params_buf)
+		return -EPERM;
+
+	mutex_lock(&fadump_mutex);
+
+	ret = count;
+
+	/*
+	 * Passing 'fadump=' here is counter-intuitive.
+	 * So, throw an error when that happens.
+	 */
+	ptr = strstr(buf, "fadump=");
+	if (ptr) {
+		pr_err("'fadump=' parameter not supported here.\n");
+		ret = -EINVAL;
+		goto unlock_out;
+	}
+
+	memset(h_params_buf, 0, (size + 1));
+
+	/* Proceed only if something worthwhile is passed */
+	for (i = 0; (i < count) && (buf[i] == ' '); i++);
+	if ((i == count) || (buf[i] == '\n'))
+		goto unlock_out;
+
+	if (buf[0] != ' ') {
+		h_params_buf[0] = ' ';
+		size--;
+	}
+
+	if (count > size) {
+		is_truncated = true;
+		count = size;
+	}
+
+	strncat(h_params_buf, buf, count);
+	size = strlen(h_params_buf);
+	if (size && h_params_buf[size-1] == '\n')
+		h_params_buf[size-1] = 0;
+
+	if (is_truncated)
+		pr_warn("Modified: %s\n", h_params_buf);
+
+unlock_out:
+	mutex_unlock(&fadump_mutex);
+	return ret;
+}
+
 static int fadump_region_show(struct seq_file *m, void *private)
 {
 	const struct fadump_mem_struct *fdm_ptr;
@@ -1245,6 +1388,10 @@ static struct kobj_attribute fadump_attr = __ATTR(fadump_enabled,
 static struct kobj_attribute fadump_register_attr = __ATTR(fadump_registered,
 						0644, fadump_register_show,
 						fadump_register_store);
+static struct
+kobj_attribute fadump_cmdline_append_attr = __ATTR(fadump_cmdline_append,
+						   0644, fadump_params_show,
+						   fadump_params_store);
 
 static int fadump_region_open(struct inode *inode, struct file *file)
 {
@@ -1273,6 +1420,14 @@ static void fadump_init_files(void)
 		printk(KERN_ERR "fadump: unable to create sysfs file"
 			" fadump_registered (%d)\n", rc);
 
+	if (fw_dump.handover_area_size) {
+		rc = sysfs_create_file(kernel_kobj,
+				       &fadump_cmdline_append_attr.attr);
+		if (rc)
+			pr_err("unable to create sysfs file "
+			       "fadump_cmdline_append (%d)\n", rc);
+	}
+
 	debugfs_file = debugfs_create_file("fadump_region", 0444,
 					powerpc_debugfs_root, NULL,
 					&fadump_region_fops);
@@ -1319,6 +1474,7 @@ int __init setup_fadump(void)
 	/* Initialize the kernel dump memory structure for FAD registration. */
 	else if (fw_dump.reserve_dump_area_size)
 		init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start);
+	init_fadump_handover_area();
 	fadump_init_files();
 
 	return 1;
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index f5d399e..c0cab4c 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -683,6 +683,28 @@ void __init early_init_devtree(void *params)
 	of_scan_flat_dt(early_init_dt_scan_root, NULL);
 	of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL);
 
+#ifdef CONFIG_FA_DUMP
+	if (is_fadump_active()) {
+		size_t len = 0;
+		char *fadump_params = get_fadump_parameters_realmode();
+
+		if (fadump_params)
+			len = strlen(fadump_params);
+
+		/* Parameters to append to capture (fadump) kernel */
+		if (len) {
+			size_t boot_cmdline_len = strlen(boot_command_line);
+
+			if ((boot_cmdline_len + len) > COMMAND_LINE_SIZE)
+				len = COMMAND_LINE_SIZE - boot_cmdline_len - 1;
+
+			strncat(boot_command_line, fadump_params, len);
+			printk(KERN_INFO "fadump: appending parameters meant "
+			       "for capture kernel.\nModified cmdline: %s\n",
+			       boot_command_line);
+		}
+	}
+#endif
 	parse_early_param();
 
 	/* make sure we've parsed cmdline for mem= before this */

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH v2 2/2] fadump: update documentation about introduction of handover area
  2017-02-07 12:38 [PATCH v2 1/2] fadump: reduce memory consumption for capture kernel Hari Bathini
@ 2017-02-07 12:38 ` Hari Bathini
  2017-04-07  1:54 ` [PATCH v2 1/2] fadump: reduce memory consumption for capture kernel Michael Ellerman
  1 sibling, 0 replies; 19+ messages in thread
From: Hari Bathini @ 2017-02-07 12:38 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev, Mahesh J Salgaonkar

Update documentation about introduction of handover area that includes
configuration details like extra parameters to append to capture
kernel.

Signed-off-by: Hari Bathini <hbathini@linux.vnet.ibm.com>
---
 Documentation/powerpc/firmware-assisted-dump.txt |   83 ++++++++++++++--------
 1 file changed, 53 insertions(+), 30 deletions(-)

diff --git a/Documentation/powerpc/firmware-assisted-dump.txt b/Documentation/powerpc/firmware-assisted-dump.txt
index 3007bc9..6c6a0e9 100644
--- a/Documentation/powerpc/firmware-assisted-dump.txt
+++ b/Documentation/powerpc/firmware-assisted-dump.txt
@@ -67,11 +67,17 @@ as follows:
 
 -- The freshly booted kernel will notice that there is a new
    node (ibm,dump-kernel) in the device tree, indicating that
-   there is crash data available from a previous boot. During
-   the early boot OS will reserve rest of the memory above
-   boot memory size effectively booting with restricted memory
-   size. This will make sure that the second kernel will not
-   touch any of the dump memory area.
+   there is crash data available from a previous boot. This
+   second kernel, where crash data is available from previous
+   boot, is referred to as capture kernel. The capture kernel,
+   early during the boot process, looks for a handover area
+   (see fig. 2), saved by the first kernel to be handed over
+   to it. This handover area contains certain config info like
+   extra parameters to append to the capture kernel. The capture
+   kernel applies this configuration accordingly. Later, reserves
+   rest of the memory above boot memory size effectively booting
+   with restricted memory size. This will make sure that the
+   capture kernel will not touch any of the dump memory area.
 
 -- User-space tools will read /proc/vmcore to obtain the contents
    of memory, which holds the previous crashed kernel dump in ELF
@@ -113,15 +119,18 @@ crash does occur.
 
   o Memory Reservation during first kernel
 
-  Low memory                                        Top of memory
-  0      boot memory size                                       |
-  |           |                       |<--Reserved dump area -->|
-  V           V                       |   Permanent Reservation V
-  +-----------+----------/ /----------+---+----+-----------+----+
-  |           |                       |CPU|HPTE|  DUMP     |ELF |
-  +-----------+----------/ /----------+---+----+-----------+----+
-        |                                           ^
-        |                                           |
+  Low memory                                              Top of memory
+  0                                                               |
+  | Handover area                                                 |
+  |   |                                                           |
+  |   |      boot memory size           |<-- Reserved dump area-->|
+  |   |          |                      |  Permanent Reservation  |
+  V   V          V                                                V
+  +--+-+--------+----------/ /----------+---+----+-----------+----+
+  |  | |        |                       |CPU|HPTE|  DUMP     |ELF |
+  +--+-+--------+----------/ /----------+---+----+-----------+----+
+  |____  _______|                                   ^
+       \/                                           |
         \                                           /
          -------------------------------------------
           Boot memory content gets transferred to
@@ -129,18 +138,21 @@ crash does occur.
           crash
                    Fig. 1
 
-  o Memory Reservation during second kernel after crash
-
-  Low memory                                        Top of memory
-  0      boot memory size                                       |
-  |           |<------------- Reserved dump area ----------- -->|
-  V           V                                                 V
-  +-----------+----------/ /----------+---+----+-----------+----+
-  |           |                       |CPU|HPTE|  DUMP     |ELF |
-  +-----------+----------/ /----------+---+----+-----------+----+
-        |                                                    |
-        V                                                    V
-   Used by second                                    /proc/vmcore
+  o Memory Reservation during capture (fadump) kernel after crash
+
+  Low memory                                          Top of memory
+  0                                                               |
+  |  Handover area                                                |
+  |   |                                                           |
+  |   |    boot memory size                                       |
+  |   |         |<------------- Reserved dump area ----------- -->|
+  V   V         V                                                 V
+  +--+-+--------+----------/ /----------+---+----+-----------+----+
+  |  | |        |                       |CPU|HPTE|  DUMP     |ELF |
+  +--+-+--------+----------/ /----------+---+----+-----------+----+
+  |____  _______|                                            |
+       \/                                                    V
+   Used by capture                                      /proc/vmcore
    kernel to boot
                    Fig. 2
 
@@ -196,10 +208,21 @@ Here is the list of files under kernel sysfs:
     be handled and vmcore will not be captured. This interface can be
     easily integrated with kdump service start/stop.
 
+ /sys/kernel/fadump_cmdline_append
+
+    This is used to specify the extra parameters to append to capture
+    kernel. Typically used to pass parameters that reduce memory/resource
+    consumption for dump capture kernel. To pass parameters to append to
+    dump capture kernel:
+
+    echo "nr_cpus=1 kvm_cma_resv_ratio=0" > /sys/kernel/fadump_cmdline_append
+
+    This interface can be easily integrated with kdump service start/stop.
+
  /sys/kernel/fadump_release_mem
 
     This file is available only when fadump is active during
-    second kernel. This is used to release the reserved memory
+    capture kernel. This is used to release the reserved memory
     region that are held for saving crash dump. To release the
     reserved memory echo 1 to it:
 
@@ -230,7 +253,7 @@ Here is the list of files under powerpc debugfs:
     HPTE: [0x0000006fff0020-0x0000006fff101f] 0x1000 bytes, Dumped: 0x0
     DUMP: [0x0000006fff1020-0x0000007fff101f] 0x10000000 bytes, Dumped: 0x0
 
-    Contents when fadump is active during second kernel
+    Contents when fadump is active during capture kernel
 
     # cat /sys/kernel/debug/powerpc/fadump_region
     CPU : [0x0000006ffb0000-0x0000006fff001f] 0x40020 bytes, Dumped: 0x40020
@@ -249,8 +272,8 @@ TODO:
    boot successfully when booted with restricted memory.
  o The fadump implementation introduces a fadump crash info structure
    in the scratch area before the ELF core header. The idea of introducing
-   this structure is to pass some important crash info data to the second
-   kernel which will help second kernel to populate ELF core header with
+   this structure is to pass some important crash info data to the capture
+   kernel which will help capture kernel to populate ELF core header with
    correct data before it gets exported through /proc/vmcore. The current
    design implementation does not address a possibility of introducing
    additional fields (in future) to this structure without affecting

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 1/2] fadump: reduce memory consumption for capture kernel
  2017-02-07 12:38 [PATCH v2 1/2] fadump: reduce memory consumption for capture kernel Hari Bathini
  2017-02-07 12:38 ` [PATCH v2 2/2] fadump: update documentation about introduction of handover area Hari Bathini
@ 2017-04-07  1:54 ` Michael Ellerman
  2017-04-07  7:24   ` Hari Bathini
  1 sibling, 1 reply; 19+ messages in thread
From: Michael Ellerman @ 2017-04-07  1:54 UTC (permalink / raw)
  To: Hari Bathini; +Cc: linuxppc-dev, Mahesh J Salgaonkar

Hari Bathini <hbathini@linux.vnet.ibm.com> writes:

> In case of fadump, capture (fadump) kernel boots like a normal kernel.
> While this has its advantages, the capture kernel would initialize all
> the components like normal kernel, which may not necessarily be needed
> for a typical dump capture kernel. So, fadump capture kernel ends up
> needing more memory than a typical (read kdump) capture kernel to boot.
>
> This can be overcome by introducing parameters like fadump_nr_cpus=1,
> similar to nr_cpus=1 parameter, applicable only when fadump is active.
> But this approach needs introduction of special parameters applicable
> only when fadump is active (capture kernel), for every parameter that
> reduces memory/resource consumption.
>
> A better approach would be to pass extra parameters to fadump capture
> kernel. As firmware leaves the memory contents intact from the time of
> crash till the new kernel is booted up, parameters to append to capture
> kernel can be saved in real memory region and retrieved later when the
> capture kernel is in its early boot process for appending to command
> line parameters.
>
> This patch introduces a new node /sys/kernel/fadump_cmdline_append to
> specify the parameters to pass to fadump capture kernel, saves them in
> real memory region and appends these parameters to capture kernel early
> in its boot process.

As we discussed on IRC I don't really like this.

It's clever, (ab)using the fact that the first kernel's memory is left
intact. But it's also a bit gross :)

It also has a few real problems, like hard coding 128MB as the handover
location. You may not have memory there, or it may be reserved.


My preference would be that the fadump kernel "just works". If it's
using too much memory then the fadump kernel should do whatever it needs
to use less memory, eg. shrinking nr_cpu_ids etc.

Do we actually know *why* the fadump kernel is running out of memory?
Obviously large numbers of CPUs is one of the main drivers (lots of
stacks required). But other than that what is causing the memory
pressure? I would like some data on that before we proceed.


If we *must* have a way to pass command line arguments to the fadump
kernel then I think we should just use a command line argument that
specifies them.

eg: fadump_append=nr_cpus=1,use_less_memory,some_other_obscure_parameter=100


cheers

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 1/2] fadump: reduce memory consumption for capture kernel
  2017-04-07  1:54 ` [PATCH v2 1/2] fadump: reduce memory consumption for capture kernel Michael Ellerman
@ 2017-04-07  7:24   ` Hari Bathini
  2017-04-07  9:06     ` Hari Bathini
  2017-04-07 13:46     ` Michael Ellerman
  0 siblings, 2 replies; 19+ messages in thread
From: Hari Bathini @ 2017-04-07  7:24 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev, Mahesh J Salgaonkar

Hi Michael,


On Friday 07 April 2017 07:24 AM, Michael Ellerman wrote:
> Hari Bathini <hbathini@linux.vnet.ibm.com> writes:
>
>> In case of fadump, capture (fadump) kernel boots like a normal kernel.
>> While this has its advantages, the capture kernel would initialize all
>> the components like normal kernel, which may not necessarily be needed
>> for a typical dump capture kernel. So, fadump capture kernel ends up
>> needing more memory than a typical (read kdump) capture kernel to boot.
>>
>> This can be overcome by introducing parameters like fadump_nr_cpus=1,
>> similar to nr_cpus=1 parameter, applicable only when fadump is active.
>> But this approach needs introduction of special parameters applicable
>> only when fadump is active (capture kernel), for every parameter that
>> reduces memory/resource consumption.
>>
>> A better approach would be to pass extra parameters to fadump capture
>> kernel. As firmware leaves the memory contents intact from the time of
>> crash till the new kernel is booted up, parameters to append to capture
>> kernel can be saved in real memory region and retrieved later when the
>> capture kernel is in its early boot process for appending to command
>> line parameters.
>>
>> This patch introduces a new node /sys/kernel/fadump_cmdline_append to
>> specify the parameters to pass to fadump capture kernel, saves them in
>> real memory region and appends these parameters to capture kernel early
>> in its boot process.
> As we discussed on IRC I don't really like this.
>
> It's clever, (ab)using the fact that the first kernel's memory is left
> intact. But it's also a bit gross :)

No doubt. It is an ugly trick :)

> It also has a few real problems, like hard coding 128MB as the handover
> location. You may not have memory there, or it may be reserved.
>

Yeah, there is a chance that appending parameters is not possible
like in the scenarios you mentioned above. My intention behind this
hack is to build on this handover area later to probably pass off a
special intird which brings down the dump capture time and memory
consumption further. But to put it in your words, it would be abusing
it even more :P . So, I would take it as a road not worthing taking..

> My preference would be that the fadump kernel "just works". If it's
> using too much memory then the fadump kernel should do whatever it needs
> to use less memory, eg. shrinking nr_cpu_ids etc.

> Do we actually know *why* the fadump kernel is running out of memory?
> Obviously large numbers of CPUs is one of the main drivers (lots of
> stacks required). But other than that what is causing the memory
> pressure? I would like some data on that before we proceed.

Almost the same amount of memory in comparison with the memory
required to boot the production kernel but that is unwarranted for fadump
(dump capture) kernel. Let's say the production kernel is configured for
memory cgroups or hugepages which is not required in a dump capture kernel
but with no option to say so, we are wasting that much more memory on fadump
and eventually depriving the production kernel of that memory.

So, if parameters like cgroup_disable=memory, transparent_hugepages=never,
numa=off, nr_cpus=1, etc.. are passed to fadump (dump capture) kernel it 
would
be beneficial. Not to mention any future additions to the kernel that 
increase the
footprint of a production kernel..

> If we *must* have a way to pass command line arguments to the fadump
> kernel then I think we should just use a command line argument that
> specifies them.
>
> eg: fadump_append=nr_cpus=1,use_less_memory,some_other_obscure_parameter=100
>
>

Hmmm.. this sounds like a better interface. But I would like know your 
preference on
how to process fadump_append parameter:

1. Modify cmdline early in fadump kernel boot process (before parsing 
parameters) to change
    fadump_append="nr_cpus=1 cgroup_disable=memory" in cmdline to 
"nr_cpus=1 cgroup_disable=memory"
    so that fadump doesn't have to bother about processing this 
parameters later.
2. A parse function in fadump to parse fadump_append parameters. A 
function similar to parse_early_param()
    meant for fadump_append parameter alone..
3. fadump code processes fadump_append for each parameter passed in it.

The third one sounds like a nightmare to me as we need to make fadump 
code aware of every new parameter
we want to enforce on fadump..

Thanks
Hari

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 1/2] fadump: reduce memory consumption for capture kernel
  2017-04-07  7:24   ` Hari Bathini
@ 2017-04-07  9:06     ` Hari Bathini
  2017-04-07 13:46     ` Michael Ellerman
  1 sibling, 0 replies; 19+ messages in thread
From: Hari Bathini @ 2017-04-07  9:06 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev, Mahesh J Salgaonkar



On Friday 07 April 2017 12:54 PM, Hari Bathini wrote:
> Hi Michael,
>
>
> On Friday 07 April 2017 07:24 AM, Michael Ellerman wrote:
>> Hari Bathini <hbathini@linux.vnet.ibm.com> writes:
>>
>>> In case of fadump, capture (fadump) kernel boots like a normal kernel.
>>> While this has its advantages, the capture kernel would initialize all
>>> the components like normal kernel, which may not necessarily be needed
>>> for a typical dump capture kernel. So, fadump capture kernel ends up
>>> needing more memory than a typical (read kdump) capture kernel to boot.
>>>
>>> This can be overcome by introducing parameters like fadump_nr_cpus=1,
>>> similar to nr_cpus=1 parameter, applicable only when fadump is active.
>>> But this approach needs introduction of special parameters applicable
>>> only when fadump is active (capture kernel), for every parameter that
>>> reduces memory/resource consumption.
>>>
>>> A better approach would be to pass extra parameters to fadump capture
>>> kernel. As firmware leaves the memory contents intact from the time of
>>> crash till the new kernel is booted up, parameters to append to capture
>>> kernel can be saved in real memory region and retrieved later when the
>>> capture kernel is in its early boot process for appending to command
>>> line parameters.
>>>
>>> This patch introduces a new node /sys/kernel/fadump_cmdline_append to
>>> specify the parameters to pass to fadump capture kernel, saves them in
>>> real memory region and appends these parameters to capture kernel early
>>> in its boot process.
>> As we discussed on IRC I don't really like this.
>>
>> It's clever, (ab)using the fact that the first kernel's memory is left
>> intact. But it's also a bit gross :)
>
> No doubt. It is an ugly trick :)
>
>> It also has a few real problems, like hard coding 128MB as the handover
>> location. You may not have memory there, or it may be reserved.
>>
>
> Yeah, there is a chance that appending parameters is not possible
> like in the scenarios you mentioned above. My intention behind this
> hack is to build on this handover area later to probably pass off a
> special intird which brings down the dump capture time and memory
> consumption further. But to put it in your words, it would be abusing
> it even more :P . So, I would take it as a road not worthing taking..
>
>> My preference would be that the fadump kernel "just works". If it's
>> using too much memory then the fadump kernel should do whatever it needs
>> to use less memory, eg. shrinking nr_cpu_ids etc.
>
>> Do we actually know *why* the fadump kernel is running out of memory?
>> Obviously large numbers of CPUs is one of the main drivers (lots of
>> stacks required). But other than that what is causing the memory
>> pressure? I would like some data on that before we proceed.
>
> Almost the same amount of memory in comparison with the memory
> required to boot the production kernel but that is unwarranted for fadump
> (dump capture) kernel. Let's say the production kernel is configured for
> memory cgroups or hugepages which is not required in a dump capture 
> kernel
> but with no option to say so, we are wasting that much more memory on 
> fadump
> and eventually depriving the production kernel of that memory.
>
> So, if parameters like cgroup_disable=memory, 
> transparent_hugepages=never,
> numa=off, nr_cpus=1, etc.. are passed to fadump (dump capture) kernel 
> it would
> be beneficial. Not to mention any future additions to the kernel that 
> increase the
> footprint of a production kernel..
>
>> If we *must* have a way to pass command line arguments to the fadump
>> kernel then I think we should just use a command line argument that
>> specifies them.
>>
>> eg: 
>> fadump_append=nr_cpus=1,use_less_memory,some_other_obscure_parameter=100
>>
>>
>
> Hmmm.. this sounds like a better interface. But I would like know your 
> preference on
> how to process fadump_append parameter:
>
> 1. Modify cmdline early in fadump kernel boot process (before parsing 
> parameters) to change
>    fadump_append="nr_cpus=1 cgroup_disable=memory" in cmdline to 
> "nr_cpus=1 cgroup_disable=memory"
>    so that fadump doesn't have to bother about processing this 
> parameters later.
> 2. A parse function in fadump to parse fadump_append parameters. A 
> function similar to parse_early_param()
>    meant for fadump_append parameter alone..
> 3. fadump code processes fadump_append for each parameter passed in it.
>
> The third one sounds like a nightmare to me as we need to make fadump 
> code aware of every new parameter
> we want to enforce on fadump..
>

I prefer option 2 for it is simple and cleaner..

Thanks
Hari

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 1/2] fadump: reduce memory consumption for capture kernel
  2017-04-07  7:24   ` Hari Bathini
  2017-04-07  9:06     ` Hari Bathini
@ 2017-04-07 13:46     ` Michael Ellerman
  2017-04-07 17:41       ` Hari Bathini
  2017-04-12 20:29       ` Hari Bathini
  1 sibling, 2 replies; 19+ messages in thread
From: Michael Ellerman @ 2017-04-07 13:46 UTC (permalink / raw)
  To: Hari Bathini; +Cc: linuxppc-dev, Mahesh J Salgaonkar

Hari Bathini <hbathini@linux.vnet.ibm.com> writes:
> On Friday 07 April 2017 07:24 AM, Michael Ellerman wrote:
>> My preference would be that the fadump kernel "just works". If it's
>> using too much memory then the fadump kernel should do whatever it needs
>> to use less memory, eg. shrinking nr_cpu_ids etc.
>
>> Do we actually know *why* the fadump kernel is running out of memory?
>> Obviously large numbers of CPUs is one of the main drivers (lots of
>> stacks required). But other than that what is causing the memory
>> pressure? I would like some data on that before we proceed.
>
> Almost the same amount of memory in comparison with the memory
> required to boot the production kernel but that is unwarranted for fadump
> (dump capture) kernel.

That's not data! :)

The dump kernel is booted with *much* less memory than the production
kernel (that's the whole issue!) and so it doesn't need to create struct
pages for all that memory, which means it should need less memory.

The vfs caches are also sized based on the available memory, so they
should also shrink in the dump kernel.

I want some actual numbers on what's driving the memory usage.

I tried some of these parameters to see how much memory they would save:

> So, if parameters like
> cgroup_disable=memory,

0 bytes saved.

> transparent_hugepages=never,

0 bytes saved.

> numa=off,

64KB saved.

> nr_cpus=1,

3MB saved (vs 16 CPUs)


Now maybe on your system those do save memory for some reason, but
please prove it to me. Otherwise I'm inclined to merge:

diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 8ff0dd4e77a7..03f1f253c372 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -79,8 +79,10 @@ int __init early_init_dt_scan_fw_dump(unsigned long node,
 	 * dump data waiting for us.
 	 */
 	fdm_active = of_get_flat_dt_prop(node, "ibm,kernel-dump", NULL);
-	if (fdm_active)
+	if (fdm_active) {
 		fw_dump.dump_active = 1;
+		nr_cpu_ids = 1;
+	}
 
 	/* Get the sizes required to store dump data for the firmware provided
 	 * dump sections.

cheers

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 1/2] fadump: reduce memory consumption for capture kernel
  2017-04-07 13:46     ` Michael Ellerman
@ 2017-04-07 17:41       ` Hari Bathini
  2017-04-12 20:29       ` Hari Bathini
  1 sibling, 0 replies; 19+ messages in thread
From: Hari Bathini @ 2017-04-07 17:41 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev, Mahesh J Salgaonkar

Hi Michael,


On Friday 07 April 2017 07:16 PM, Michael Ellerman wrote:
> Hari Bathini <hbathini@linux.vnet.ibm.com> writes:
>> On Friday 07 April 2017 07:24 AM, Michael Ellerman wrote:
>>> My preference would be that the fadump kernel "just works". If it's
>>> using too much memory then the fadump kernel should do whatever it needs
>>> to use less memory, eg. shrinking nr_cpu_ids etc.
>>> Do we actually know *why* the fadump kernel is running out of memory?
>>> Obviously large numbers of CPUs is one of the main drivers (lots of
>>> stacks required). But other than that what is causing the memory
>>> pressure? I would like some data on that before we proceed.
>> Almost the same amount of memory in comparison with the memory
>> required to boot the production kernel but that is unwarranted for fadump
>> (dump capture) kernel.
> That's not data! :)

I am collating the data. Sorry! I should have mentioned it :)

> The dump kernel is booted with *much* less memory than the production
> kernel (that's the whole issue!) and so it doesn't need to create struct
> pages for all that memory, which means it should need less memory.

What I meant was, if we were to boot production kernel with mem=X, where 
X is
the smallest possible value to boot the kernel without resulting in an 
OOM, fadump
needed nearly the same amount to be reserved for it to capture dump without
hitting an OOM. But this was an observation on system with not so much 
memory.
Will try on a system with large memory and report back with data..

>
> The vfs caches are also sized based on the available memory, so they
> should also shrink in the dump kernel.
>
> I want some actual numbers on what's driving the memory usage.
>
> I tried some of these parameters to see how much memory they would save:
>
>> So, if parameters like
>> cgroup_disable=memory,
> 0 bytes saved.

Interesting.. was CONFIG_MEMCG set on the kernel?

>
>> transparent_hugepages=never,
> 0 bytes saved.

Not surprising unless transparent hugepages were used

>> numa=off,
> 64KB saved.

In the memory starved dump capture environment, every byte counts, I 
guess :)
Also, depends on the numa config?

>> nr_cpus=1,
> 3MB saved (vs 16 CPUs)
>
>
> Now maybe on your system those do save memory for some reason, but
> please prove it to me. Otherwise I'm inclined to merge:
>
> diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
> index 8ff0dd4e77a7..03f1f253c372 100644
> --- a/arch/powerpc/kernel/fadump.c
> +++ b/arch/powerpc/kernel/fadump.c
> @@ -79,8 +79,10 @@ int __init early_init_dt_scan_fw_dump(unsigned long node,
>   	 * dump data waiting for us.
>   	 */
>   	fdm_active = of_get_flat_dt_prop(node, "ibm,kernel-dump", NULL);
> -	if (fdm_active)
> +	if (fdm_active) {
>   		fw_dump.dump_active = 1;
> +		nr_cpu_ids = 1;
> +	}
>
>   	/* Get the sizes required to store dump data for the firmware provided
>   	 * dump sections.

Necessary but not sufficient is the point I am trying to make. 
Apparently not
convincing enough. Will try and come back with relevant data :)

Thanks
Hari

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 1/2] fadump: reduce memory consumption for capture kernel
  2017-04-07 13:46     ` Michael Ellerman
  2017-04-07 17:41       ` Hari Bathini
@ 2017-04-12 20:29       ` Hari Bathini
  2017-04-13 19:58         ` Michal Suchánek
  1 sibling, 1 reply; 19+ messages in thread
From: Hari Bathini @ 2017-04-12 20:29 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev, Mahesh J Salgaonkar



On Friday 07 April 2017 07:16 PM, Michael Ellerman wrote:
> Hari Bathini <hbathini@linux.vnet.ibm.com> writes:
>> On Friday 07 April 2017 07:24 AM, Michael Ellerman wrote:
>>> My preference would be that the fadump kernel "just works". If it's
>>> using too much memory then the fadump kernel should do whatever it needs
>>> to use less memory, eg. shrinking nr_cpu_ids etc.
>>> Do we actually know *why* the fadump kernel is running out of memory?
>>> Obviously large numbers of CPUs is one of the main drivers (lots of
>>> stacks required). But other than that what is causing the memory
>>> pressure? I would like some data on that before we proceed.
>> Almost the same amount of memory in comparison with the memory
>> required to boot the production kernel but that is unwarranted for fadump
>> (dump capture) kernel.
> That's not data! :)
>
> The dump kernel is booted with *much* less memory than the production
> kernel (that's the whole issue!) and so it doesn't need to create struct
> pages for all that memory, which means it should need less memory.
>
> The vfs caches are also sized based on the available memory, so they
> should also shrink in the dump kernel.
>
> I want some actual numbers on what's driving the memory usage.
>
> I tried some of these parameters to see how much memory they would save:

Hi Michael,

Tried to get data to show parameters like numa=off & cgroup_disable=memory
matter too but parameter nr_cpus=1 is making parameters like numa=off,
cgroup_disable=memory insignificant. Also, these parameters not using much
of early memory reservations is making quantification of memory saved for
each of them that much more difficult. But I would still like to argue that
passing additional parameters to fadump is better than enforcing nr_cpus=1
in the kernel for:

   a) With makedumpfile tool supporting multi-threading it would make sense
      to leave the choice of how many CPUs to have, to the user.

   b) Parameters like udev.children-max=2 can help to reduce the number of
      parallel executed events bringing down the memory pressure on fadump
      kernel (when it is booted with more than one CPU).

   c) Ease of maintainability is better (considering any new kernel features
      with some memory to save or stability to gain on disabling, possible
      platform supports) with append approach over enforcing these 
parameters
      in the kernel.

   d) It would give user the flexibility to disable unwanted kernel features
      in fadump kernel (numa=off, cgroup_disable=memory). For every feature
      enabled in the production kernel, fadump kernel will have the 
choice to
      opt out of it, provided there is such cmdline option.

>> So, if parameters like
>> cgroup_disable=memory,
> 0 bytes saved.
>
>> transparent_hugepages=never,
> 0 bytes saved.
>
>> numa=off,
> 64KB saved.
>
>> nr_cpus=1,
> 3MB saved (vs 16 CPUs)
>

Hmmm... On a system with single core and 8GB memory, fadump kernel captures
dump successfully with 272MB passing nr_cpus=1 while it needed 320MB (+48MB)
to do the same without nr_cpus=1. So, while the early reservations saved 
is only a
couple of megabytes, it rubs off further in the boot process to reduce 
memory
consumption by nearly 50MB :)

> Now maybe on your system those do save memory for some reason, but
> please prove it to me. Otherwise I'm inclined to merge:
>
> diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
> index 8ff0dd4e77a7..03f1f253c372 100644
> --- a/arch/powerpc/kernel/fadump.c
> +++ b/arch/powerpc/kernel/fadump.c
> @@ -79,8 +79,10 @@ int __init early_init_dt_scan_fw_dump(unsigned long node,
>   	 * dump data waiting for us.
>   	 */
>   	fdm_active = of_get_flat_dt_prop(node, "ibm,kernel-dump", NULL);
> -	if (fdm_active)
> +	if (fdm_active) {
>   		fw_dump.dump_active = 1;
> +		nr_cpu_ids = 1;
> +	}
>
>   	/* Get the sizes required to store dump data for the firmware provided
>   	 * dump sections.
>
>

Based on your suggestion, I am thinking of something like the below:

--
powerpc/fadump: reduce memory consumption for capture kernel

With fadump (dump capture) kernel booting like a regular kernel, it almost
needs the same amount of memory to boot as the production kernel, which is
unwarranted for a dump capture kernel. But with no option to disable some
of the unnecessary subsystems in fadump kernel, that much memory is wasted
on fadump, depriving the production kernel of that memory.

Introduce kernel parameter 'fadump_append=' that would take regular kernel
parameters as a comma separated list, to be enforced when fadump is active.
This 'fadump_append=' parameter can be leveraged to pass parameters like
nr_cpus=1, cgroup_disable=memory and numa=off, to disable unwarranted
resources/subsystems.

Also, ensure the log "Firmware-assisted dump is active" is printed early
in the boot process to put the subsequent fadump messages in context.

Suggested-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Hari Bathini <hbathini@linux.vnet.ibm.com>
---
  arch/powerpc/kernel/fadump.c |   41 
++++++++++++++++++++++++++++++++++++++---
  1 file changed, 38 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index e013f8f..c4d4663 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -79,8 +79,10 @@ int __init early_init_dt_scan_fw_dump(unsigned long node,
       * dump data waiting for us.
       */
      fdm_active = of_get_flat_dt_prop(node, "ibm,kernel-dump", NULL);
-    if (fdm_active)
+    if (fdm_active) {
+        pr_info("Firmware-assisted dump is active.\n");
          fw_dump.dump_active = 1;
+    }

      /* Get the sizes required to store dump data for the firmware provided
       * dump sections.
@@ -263,8 +265,12 @@ int __init fadump_reserve_mem(void)
  {
      unsigned long base, size, memory_boundary;

-    if (!fw_dump.fadump_enabled)
+    if (!fw_dump.fadump_enabled) {
+        if (fw_dump.dump_active)
+            pr_warn("Firmware-assisted dump was active but kernel"
+                " booted with fadump disabled!\n");
          return 0;
+    }

      if (!fw_dump.fadump_supported) {
          printk(KERN_INFO "Firmware-assisted dump is not supported on"
@@ -304,7 +310,6 @@ int __init fadump_reserve_mem(void)
          memory_boundary = memblock_end_of_DRAM();

      if (fw_dump.dump_active) {
-        printk(KERN_INFO "Firmware-assisted dump is active.\n");
          /*
           * If last boot has crashed then reserve all the memory
           * above boot_memory_size so that we don't touch it until
@@ -359,6 +364,36 @@ static int __init early_fadump_param(char *p)
  }
  early_param("fadump", early_fadump_param);

+static void __init parse_fadump_append_params(const char *p)
+{
+    static char fadump_cmdline[COMMAND_LINE_SIZE] __initdata;
+    char *token;
+
+    strlcpy(fadump_cmdline, p, COMMAND_LINE_SIZE);
+    token = strchr(fadump_cmdline, ',');
+    while (token) {
+        *token = ' ';
+        token = strchr(token, ',');
+    }
+
+    pr_info("enforcing additional parameters (%s) passed through "
+        "'fadump_append=' parameter\n", fadump_cmdline);
+    parse_early_options(fadump_cmdline);
+}
+
+/* Look for fadump_append= cmdline option. */
+static int __init early_fadump_append_param(char *p)
+{
+    if (!p)
+        return 1;
+
+    if (fw_dump.dump_active)
+        parse_fadump_append_params(p);
+
+    return 0;
+}
+early_param("fadump_append", early_fadump_append_param);
+
  static void register_fw_dump(struct fadump_mem_struct *fdm)
  {
      int rc;


Thanks
Hari

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 1/2] fadump: reduce memory consumption for capture kernel
  2017-04-12 20:29       ` Hari Bathini
@ 2017-04-13 19:58         ` Michal Suchánek
  2017-04-17 15:13           ` Hari Bathini
  0 siblings, 1 reply; 19+ messages in thread
From: Michal Suchánek @ 2017-04-13 19:58 UTC (permalink / raw)
  To: Hari Bathini; +Cc: Michael Ellerman, linuxppc-dev, Mahesh J Salgaonkar

On Thu, 13 Apr 2017 01:59:13 +0530
Hari Bathini <hbathini@linux.vnet.ibm.com> wrote:

> On Friday 07 April 2017 07:16 PM, Michael Ellerman wrote:
> > Hari Bathini <hbathini@linux.vnet.ibm.com> writes:  
> >> On Friday 07 April 2017 07:24 AM, Michael Ellerman wrote:  
> >>> My preference would be that the fadump kernel "just works". If
> >>> it's using too much memory then the fadump kernel should do
> >>> whatever it needs to use less memory, eg. shrinking nr_cpu_ids
> >>> etc. Do we actually know *why* the fadump kernel is running out
> >>> of memory? Obviously large numbers of CPUs is one of the main
> >>> drivers (lots of stacks required). But other than that what is
> >>> causing the memory pressure? I would like some data on that
> >>> before we proceed.  
> >> Almost the same amount of memory in comparison with the memory
> >> required to boot the production kernel but that is unwarranted for
> >> fadump (dump capture) kernel.  
> > That's not data! :)
> >
> > The dump kernel is booted with *much* less memory than the
> > production kernel (that's the whole issue!) and so it doesn't need
> > to create struct pages for all that memory, which means it should
> > need less memory.
> >
> > The vfs caches are also sized based on the available memory, so they
> > should also shrink in the dump kernel.
> >
> > I want some actual numbers on what's driving the memory usage.
> >
> > I tried some of these parameters to see how much memory they would
> > save:  
> 
> Hi Michael,
> 
> Tried to get data to show parameters like numa=off &
> cgroup_disable=memory matter too but parameter nr_cpus=1 is making
> parameters like numa=off, cgroup_disable=memory insignificant. Also,
> these parameters not using much of early memory reservations is
> making quantification of memory saved for each of them that much more
> difficult. But I would still like to argue that passing additional
> parameters to fadump is better than enforcing nr_cpus=1 in the kernel
> for:
> 
>    a) With makedumpfile tool supporting multi-threading it would make
> sense to leave the choice of how many CPUs to have, to the user.
> 
>    b) Parameters like udev.children-max=2 can help to reduce the
> number of parallel executed events bringing down the memory pressure
> on fadump kernel (when it is booted with more than one CPU).
> 
>    c) Ease of maintainability is better (considering any new kernel
> features with some memory to save or stability to gain on disabling,
> possible platform supports) with append approach over enforcing these 
> parameters
>       in the kernel.
> 
>    d) It would give user the flexibility to disable unwanted kernel
> features in fadump kernel (numa=off, cgroup_disable=memory). For
> every feature enabled in the production kernel, fadump kernel will
> have the choice to
>       opt out of it, provided there is such cmdline option.

Hello,

can't the extra parameters be passed in the devicetree?

The docs say that the kernel can tell it's a fadump crash kernel by
checking the devicetree ibm,dump-kernel property. Is there any reason
more (optional) properties cannot be added?

Thanks

Michal

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 1/2] fadump: reduce memory consumption for capture kernel
  2017-04-13 19:58         ` Michal Suchánek
@ 2017-04-17 15:13           ` Hari Bathini
  2017-04-18 14:18             ` Michal Suchánek
  0 siblings, 1 reply; 19+ messages in thread
From: Hari Bathini @ 2017-04-17 15:13 UTC (permalink / raw)
  To: Michal Suchánek; +Cc: Michael Ellerman, linuxppc-dev, Mahesh J Salgaonkar



On Friday 14 April 2017 01:28 AM, Michal Suchánek wrote:
> On Thu, 13 Apr 2017 01:59:13 +0530
> Hari Bathini <hbathini@linux.vnet.ibm.com> wrote:
>
>> On Friday 07 April 2017 07:16 PM, Michael Ellerman wrote:
>>> Hari Bathini <hbathini@linux.vnet.ibm.com> writes:
>>>> On Friday 07 April 2017 07:24 AM, Michael Ellerman wrote:
>>>>> My preference would be that the fadump kernel "just works". If
>>>>> it's using too much memory then the fadump kernel should do
>>>>> whatever it needs to use less memory, eg. shrinking nr_cpu_ids
>>>>> etc. Do we actually know *why* the fadump kernel is running out
>>>>> of memory? Obviously large numbers of CPUs is one of the main
>>>>> drivers (lots of stacks required). But other than that what is
>>>>> causing the memory pressure? I would like some data on that
>>>>> before we proceed.
>>>> Almost the same amount of memory in comparison with the memory
>>>> required to boot the production kernel but that is unwarranted for
>>>> fadump (dump capture) kernel.
>>> That's not data! :)
>>>
>>> The dump kernel is booted with *much* less memory than the
>>> production kernel (that's the whole issue!) and so it doesn't need
>>> to create struct pages for all that memory, which means it should
>>> need less memory.
>>>
>>> The vfs caches are also sized based on the available memory, so they
>>> should also shrink in the dump kernel.
>>>
>>> I want some actual numbers on what's driving the memory usage.
>>>
>>> I tried some of these parameters to see how much memory they would
>>> save:
>> Hi Michael,
>>
>> Tried to get data to show parameters like numa=off &
>> cgroup_disable=memory matter too but parameter nr_cpus=1 is making
>> parameters like numa=off, cgroup_disable=memory insignificant. Also,
>> these parameters not using much of early memory reservations is
>> making quantification of memory saved for each of them that much more
>> difficult. But I would still like to argue that passing additional
>> parameters to fadump is better than enforcing nr_cpus=1 in the kernel
>> for:
>>
>>     a) With makedumpfile tool supporting multi-threading it would make
>> sense to leave the choice of how many CPUs to have, to the user.
>>
>>     b) Parameters like udev.children-max=2 can help to reduce the
>> number of parallel executed events bringing down the memory pressure
>> on fadump kernel (when it is booted with more than one CPU).
>>
>>     c) Ease of maintainability is better (considering any new kernel
>> features with some memory to save or stability to gain on disabling,
>> possible platform supports) with append approach over enforcing these
>> parameters
>>        in the kernel.
>>
>>     d) It would give user the flexibility to disable unwanted kernel
>> features in fadump kernel (numa=off, cgroup_disable=memory). For
>> every feature enabled in the production kernel, fadump kernel will
>> have the choice to
>>        opt out of it, provided there is such cmdline option.
> Hello,

Hi Michal,

> can't the extra parameters be passed in the devicetree?

Hmmm.. possible. Without change in f/w, this may not be guaranteed though.

> The docs say that the kernel can tell it's a fadump crash kernel by
> checking the devicetree ibm,dump-kernel property. Is there any reason

This node is exported by firmware

> more (optional) properties cannot be added?

Kernel change seems simple over f/w enhancement..

Thanks
Hari

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 1/2] fadump: reduce memory consumption for capture kernel
  2017-04-17 15:13           ` Hari Bathini
@ 2017-04-18 14:18             ` Michal Suchánek
  2017-04-19  4:19               ` Michael Ellerman
  0 siblings, 1 reply; 19+ messages in thread
From: Michal Suchánek @ 2017-04-18 14:18 UTC (permalink / raw)
  To: Hari Bathini; +Cc: Mahesh J Salgaonkar, linuxppc-dev

On Mon, 17 Apr 2017 20:43:02 +0530
Hari Bathini <hbathini@linux.vnet.ibm.com> wrote:

> On Friday 14 April 2017 01:28 AM, Michal Such=C3=A1nek wrote:
> > On Thu, 13 Apr 2017 01:59:13 +0530
> > Hari Bathini <hbathini@linux.vnet.ibm.com> wrote:
> > =20
> >> On Friday 07 April 2017 07:16 PM, Michael Ellerman wrote: =20
> >>> Hari Bathini <hbathini@linux.vnet.ibm.com> writes: =20
> >>>> On Friday 07 April 2017 07:24 AM, Michael Ellerman wrote: =20
> >>>>> My preference would be that the fadump kernel "just works". If
> >>>>> it's using too much memory then the fadump kernel should do
> >>>>> whatever it needs to use less memory, eg. shrinking nr_cpu_ids
> >>>>> etc. Do we actually know *why* the fadump kernel is running out
> >>>>> of memory? Obviously large numbers of CPUs is one of the main
> >>>>> drivers (lots of stacks required). But other than that what is
> >>>>> causing the memory pressure? I would like some data on that
> >>>>> before we proceed. =20
> >>>> Almost the same amount of memory in comparison with the memory
> >>>> required to boot the production kernel but that is unwarranted
> >>>> for fadump (dump capture) kernel. =20
> >>> That's not data! :)
> >>>
> >>> The dump kernel is booted with *much* less memory than the
> >>> production kernel (that's the whole issue!) and so it doesn't need
> >>> to create struct pages for all that memory, which means it should
> >>> need less memory.
> >>>
> >>> The vfs caches are also sized based on the available memory, so
> >>> they should also shrink in the dump kernel.
> >>>
> >>> I want some actual numbers on what's driving the memory usage.
> >>>
> >>> I tried some of these parameters to see how much memory they would
> >>> save: =20
> >> Hi Michael,
> >>
> >> Tried to get data to show parameters like numa=3Doff &
> >> cgroup_disable=3Dmemory matter too but parameter nr_cpus=3D1 is making
> >> parameters like numa=3Doff, cgroup_disable=3Dmemory insignificant.
> >> Also, these parameters not using much of early memory reservations
> >> is making quantification of memory saved for each of them that
> >> much more difficult. But I would still like to argue that passing
> >> additional parameters to fadump is better than enforcing nr_cpus=3D1
> >> in the kernel for:
> >>
> >>     a) With makedumpfile tool supporting multi-threading it would
> >> make sense to leave the choice of how many CPUs to have, to the
> >> user.
> >>
> >>     b) Parameters like udev.children-max=3D2 can help to reduce the
> >> number of parallel executed events bringing down the memory
> >> pressure on fadump kernel (when it is booted with more than one
> >> CPU).
> >>
> >>     c) Ease of maintainability is better (considering any new
> >> kernel features with some memory to save or stability to gain on
> >> disabling, possible platform supports) with append approach over
> >> enforcing these parameters
> >>        in the kernel.
> >>
> >>     d) It would give user the flexibility to disable unwanted
> >> kernel features in fadump kernel (numa=3Doff,
> >> cgroup_disable=3Dmemory). For every feature enabled in the
> >> production kernel, fadump kernel will have the choice to
> >>        opt out of it, provided there is such cmdline option. =20
> > Hello, =20
>=20
> Hi Michal,
>=20
> > can't the extra parameters be passed in the devicetree? =20
>=20
> Hmmm.. possible. Without change in f/w, this may not be guaranteed
> though.
>=20
> > The docs say that the kernel can tell it's a fadump crash kernel by
> > checking the devicetree ibm,dump-kernel property. Is there any
> > reason =20
>=20
> This node is exported by firmware
>=20
> > more (optional) properties cannot be added? =20
>=20
> Kernel change seems simple over f/w enhancement..

That certainly looks so when you are a kernel developer and can
implement the change yourself compared to convincing some firmware
developer that this feature makes sense.

On the other hand, the proposed kernel-only solution introduces
requirement that the maintainer does not like.

For the platform as a whole does it make more sense to add a hack to
the kernel or does it make sense to enhance the firmware to provide
more options for firmware-assisted dump?

Thanks

Michal

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 1/2] fadump: reduce memory consumption for capture kernel
  2017-04-18 14:18             ` Michal Suchánek
@ 2017-04-19  4:19               ` Michael Ellerman
  2017-04-19 14:08                 ` Michal Suchánek
  0 siblings, 1 reply; 19+ messages in thread
From: Michael Ellerman @ 2017-04-19  4:19 UTC (permalink / raw)
  To: Michal Suchánek, Hari Bathini; +Cc: Mahesh J Salgaonkar, linuxppc-dev

Michal Such=C3=A1nek <msuchanek@suse.de> writes:
> On Mon, 17 Apr 2017 20:43:02 +0530
> Hari Bathini <hbathini@linux.vnet.ibm.com> wrote:
>> On Friday 14 April 2017 01:28 AM, Michal Such=C3=A1nek wrote:
>> > more (optional) properties cannot be added?=20=20
>>=20
>> Kernel change seems simple over f/w enhancement..
>
> That certainly looks so when you are a kernel developer and can
> implement the change yourself compared to convincing some firmware
> developer that this feature makes sense.
>
> On the other hand, the proposed kernel-only solution introduces
> requirement that the maintainer does not like.
>
> For the platform as a whole does it make more sense to add a hack to
> the kernel or does it make sense to enhance the firmware to provide
> more options for firmware-assisted dump?

Unfortunately it doesn't really matter, because there is firmware out
there that implements the current behaviour and will never be updated.
So we have to work with what's there.

I don't think fadump_append is a hack, though it's not pretty I'll
admit.

cheers

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 1/2] fadump: reduce memory consumption for capture kernel
  2017-04-19  4:19               ` Michael Ellerman
@ 2017-04-19 14:08                 ` Michal Suchánek
  2017-04-20 18:49                   ` Hari Bathini
  0 siblings, 1 reply; 19+ messages in thread
From: Michal Suchánek @ 2017-04-19 14:08 UTC (permalink / raw)
  To: linuxppc-dev

On Wed, 19 Apr 2017 14:19:47 +1000
Michael Ellerman <mpe@ellerman.id.au> wrote:

> Michal Such=C3=A1nek <msuchanek@suse.de> writes:
> > On Mon, 17 Apr 2017 20:43:02 +0530
> > Hari Bathini <hbathini@linux.vnet.ibm.com> wrote: =20
> >> On Friday 14 April 2017 01:28 AM, Michal Such=C3=A1nek wrote: =20
> >> > more (optional) properties cannot be added?   =20
> >>=20
> >> Kernel change seems simple over f/w enhancement.. =20
> >
> > That certainly looks so when you are a kernel developer and can
> > implement the change yourself compared to convincing some firmware
> > developer that this feature makes sense.
> >
> > On the other hand, the proposed kernel-only solution introduces
> > requirement that the maintainer does not like.
> >
> > For the platform as a whole does it make more sense to add a hack to
> > the kernel or does it make sense to enhance the firmware to provide
> > more options for firmware-assisted dump? =20
>=20
> Unfortunately it doesn't really matter, because there is firmware out
> there that implements the current behaviour and will never be updated.
> So we have to work with what's there.
>=20

It's not that with the existing firmware fadump does not work. It just
uses more memory than needed. So if new firmware revision allows more
flexibility it will work for people with updated firmware and people
with outdated firmware will get the current behavior.

The memory saving is only significant for big systems so only people
running those will get significant improvement from the updated
firmware.

Thanks

Michal

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 1/2] fadump: reduce memory consumption for capture kernel
  2017-04-19 14:08                 ` Michal Suchánek
@ 2017-04-20 18:49                   ` Hari Bathini
  2017-04-24 10:24                     ` Michal Suchánek
  0 siblings, 1 reply; 19+ messages in thread
From: Hari Bathini @ 2017-04-20 18:49 UTC (permalink / raw)
  To: Michal Suchánek; +Cc: linuxppc-dev, Michael Ellerman



On Wednesday 19 April 2017 07:38 PM, Michal Suchánek wrote:
> On Wed, 19 Apr 2017 14:19:47 +1000
> Michael Ellerman <mpe@ellerman.id.au> wrote:
>
>> Michal Suchánek <msuchanek@suse.de> writes:
>>> On Mon, 17 Apr 2017 20:43:02 +0530
>>> Hari Bathini <hbathini@linux.vnet.ibm.com> wrote:
>>>> On Friday 14 April 2017 01:28 AM, Michal Suchánek wrote:
>>>>> more (optional) properties cannot be added?
>>>> Kernel change seems simple over f/w enhancement..
>>> That certainly looks so when you are a kernel developer and can
>>> implement the change yourself compared to convincing some firmware
>>> developer that this feature makes sense.
>>>
>>> On the other hand, the proposed kernel-only solution introduces
>>> requirement that the maintainer does not like.
>>>
>>> For the platform as a whole does it make more sense to add a hack to
>>> the kernel or does it make sense to enhance the firmware to provide
>>> more options for firmware-assisted dump?
>> Unfortunately it doesn't really matter, because there is firmware out
>> there that implements the current behaviour and will never be updated.
>> So we have to work with what's there.
>>
> It's not that with the existing firmware fadump does not work. It just
> uses more memory than needed. So if new firmware revision allows more
> flexibility it will work for people with updated firmware and people
> with outdated firmware will get the current behavior.
>
> The memory saving is only significant for big systems so only people
> running those will get significant improvement from the updated
> firmware.
>


Hi Michal,

With the fadump_append= approach I posted in this response thread,
additional parameters are enforced when fadump is active. If f/w supports
appending additional parameters, it has to update chosen/bootargs
whenever fadump is active. Almost the same thing except the dirty job
is now done by f/w? Hence I thought fadump_append= kernel parameter
approach is simple and lesser of the two evils? Am i missing something 
here..

Thanks
Hari

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 1/2] fadump: reduce memory consumption for capture kernel
  2017-04-20 18:49                   ` Hari Bathini
@ 2017-04-24 10:24                     ` Michal Suchánek
  2017-04-24 12:56                       ` Hari Bathini
  0 siblings, 1 reply; 19+ messages in thread
From: Michal Suchánek @ 2017-04-24 10:24 UTC (permalink / raw)
  To: Hari Bathini; +Cc: linuxppc-dev

On Fri, 21 Apr 2017 00:19:55 +0530
Hari Bathini <hbathini@linux.vnet.ibm.com> wrote:

> On Wednesday 19 April 2017 07:38 PM, Michal Such=C3=A1nek wrote:
> > On Wed, 19 Apr 2017 14:19:47 +1000
> > Michael Ellerman <mpe@ellerman.id.au> wrote:
> > =20
> >> Michal Such=C3=A1nek <msuchanek@suse.de> writes: =20
> >>> On Mon, 17 Apr 2017 20:43:02 +0530
> >>> Hari Bathini <hbathini@linux.vnet.ibm.com> wrote: =20
> >>>> On Friday 14 April 2017 01:28 AM, Michal Such=C3=A1nek wrote: =20
> >>>>> more (optional) properties cannot be added? =20
> >>>> Kernel change seems simple over f/w enhancement.. =20
> >>> That certainly looks so when you are a kernel developer and can
> >>> implement the change yourself compared to convincing some firmware
> >>> developer that this feature makes sense.
> >>>
> >>> On the other hand, the proposed kernel-only solution introduces
> >>> requirement that the maintainer does not like.
> >>>
> >>> For the platform as a whole does it make more sense to add a hack
> >>> to the kernel or does it make sense to enhance the firmware to
> >>> provide more options for firmware-assisted dump? =20
> >> Unfortunately it doesn't really matter, because there is firmware
> >> out there that implements the current behaviour and will never be
> >> updated. So we have to work with what's there.
> >> =20
> > It's not that with the existing firmware fadump does not work. It
> > just uses more memory than needed. So if new firmware revision
> > allows more flexibility it will work for people with updated
> > firmware and people with outdated firmware will get the current
> > behavior.
> >
> > The memory saving is only significant for big systems so only people
> > running those will get significant improvement from the updated
> > firmware.
> > =20
>=20
>=20
> Hi Michal,
>=20
> With the fadump_append=3D approach I posted in this response thread,
> additional parameters are enforced when fadump is active. If f/w
> supports appending additional parameters, it has to update
> chosen/bootargs whenever fadump is active. Almost the same thing
> except the dirty job is now done by f/w? Hence I thought
> fadump_append=3D kernel parameter approach is simple and lesser of the
> two evils? Am i missing something here..
>=20

The firmware already has to set the parameter by which the kernel can
tell it is a fadump kernel. Hence it already modifies the devicetree for
fadump and you have to rely on it to do so.

The handover area which stores the additional parameters is reserved by
the running kernel so now you also have to rely on the running kernel,
the running kernel and fadum kernel having the same idea about the
location of handover area, the crashing kernel not randomly overwriting
the handover area, etc.

In short it is additional potential for trouble which can be avoided.

I don't know if the firmware does protect the fadump reserved area and
devicetree from being randomly overwritten by the crashing kernel but
it is in the position to do so if desired. The handover area is
controlled by the crashing kernel and cannot have this guarantee.

Thanks

Michal

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 1/2] fadump: reduce memory consumption for capture kernel
  2017-04-24 10:24                     ` Michal Suchánek
@ 2017-04-24 12:56                       ` Hari Bathini
  2017-04-24 14:00                         ` Michal Suchánek
  0 siblings, 1 reply; 19+ messages in thread
From: Hari Bathini @ 2017-04-24 12:56 UTC (permalink / raw)
  To: Michal Suchánek; +Cc: linuxppc-dev

Hi Michal.

On Monday 24 April 2017 03:54 PM, Michal Suchánek wrote:
> On Fri, 21 Apr 2017 00:19:55 +0530
> Hari Bathini <hbathini@linux.vnet.ibm.com> wrote:
>
>> On Wednesday 19 April 2017 07:38 PM, Michal Suchánek wrote:
>>> On Wed, 19 Apr 2017 14:19:47 +1000
>>> Michael Ellerman <mpe@ellerman.id.au> wrote:
>>>   
>>>> Michal Suchánek <msuchanek@suse.de> writes:
>>>>> On Mon, 17 Apr 2017 20:43:02 +0530
>>>>> Hari Bathini <hbathini@linux.vnet.ibm.com> wrote:
>>>>>> On Friday 14 April 2017 01:28 AM, Michal Suchánek wrote:
>>>>>>> more (optional) properties cannot be added?
>>>>>> Kernel change seems simple over f/w enhancement..
>>>>> That certainly looks so when you are a kernel developer and can
>>>>> implement the change yourself compared to convincing some firmware
>>>>> developer that this feature makes sense.
>>>>>
>>>>> On the other hand, the proposed kernel-only solution introduces
>>>>> requirement that the maintainer does not like.
>>>>>
>>>>> For the platform as a whole does it make more sense to add a hack
>>>>> to the kernel or does it make sense to enhance the firmware to
>>>>> provide more options for firmware-assisted dump?
>>>> Unfortunately it doesn't really matter, because there is firmware
>>>> out there that implements the current behaviour and will never be
>>>> updated. So we have to work with what's there.
>>>>   
>>> It's not that with the existing firmware fadump does not work. It
>>> just uses more memory than needed. So if new firmware revision
>>> allows more flexibility it will work for people with updated
>>> firmware and people with outdated firmware will get the current
>>> behavior.
>>>
>>> The memory saving is only significant for big systems so only people
>>> running those will get significant improvement from the updated
>>> firmware.
>>>   
>>
>> Hi Michal,
>>
>> With the fadump_append= approach I posted in this response thread,
>> additional parameters are enforced when fadump is active. If f/w
>> supports appending additional parameters, it has to update
>> chosen/bootargs whenever fadump is active. Almost the same thing
>> except the dirty job is now done by f/w? Hence I thought
>> fadump_append= kernel parameter approach is simple and lesser of the
>> two evils? Am i missing something here..
>>
> The firmware already has to set the parameter by which the kernel can
> tell it is a fadump kernel. Hence it already modifies the devicetree for
> fadump and you have to rely on it to do so.

Right, devicetree is modified to include new 'ibm,kernel-dump' rtas node
informing that fadump is active.

> The handover area which stores the additional parameters is reserved by
> the running kernel so now you also have to rely on the running kernel,
> the running kernel and fadum kernel having the same idea about the
> location of handover area, the crashing kernel not randomly overwriting
> the handover area, etc.
>
> In short it is additional potential for trouble which can be avoided.
>
> I don't know if the firmware does protect the fadump reserved area and
> devicetree from being randomly overwritten by the crashing kernel but
> it is in the position to do so if desired. The handover area is
> controlled by the crashing kernel and cannot have this guarantee.
>
>

I thinks there is a mixup here..
I am no longer batting for handover area approach but
"fadump_append=" approach (suggested by Michael Ellerman in this thread)
where there is no need for a handover area but an additional kernel
parameter fadump_append=numa=off,nr_cpus=1.. which is a comma separated
list of parameters the user wants to enforce when fadump is active.

I was trying to say that passing additional parameters with 'fadump_append='
kernel parameter is better over f/w changing the chosen/bootargs node.


Thanks
Hari

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 1/2] fadump: reduce memory consumption for capture kernel
  2017-04-24 12:56                       ` Hari Bathini
@ 2017-04-24 14:00                         ` Michal Suchánek
  2017-04-25 13:13                           ` Hari Bathini
  0 siblings, 1 reply; 19+ messages in thread
From: Michal Suchánek @ 2017-04-24 14:00 UTC (permalink / raw)
  To: linuxppc-dev

On Mon, 24 Apr 2017 18:26:37 +0530
Hari Bathini <hbathini@linux.vnet.ibm.com> wrote:

> Hi Michal.
>=20
> On Monday 24 April 2017 03:54 PM, Michal Such=C3=A1nek wrote:
> > On Fri, 21 Apr 2017 00:19:55 +0530
> > Hari Bathini <hbathini@linux.vnet.ibm.com> wrote:
> > =20
> >> On Wednesday 19 April 2017 07:38 PM, Michal Such=C3=A1nek wrote: =20
> >>> On Wed, 19 Apr 2017 14:19:47 +1000
> >>> Michael Ellerman <mpe@ellerman.id.au> wrote:
> >>>    =20

> >> With the fadump_append=3D approach I posted in this response thread,
> >> additional parameters are enforced when fadump is active. If f/w
> >> supports appending additional parameters, it has to update
> >> chosen/bootargs whenever fadump is active. Almost the same thing
> >> except the dirty job is now done by f/w? Hence I thought
> >> fadump_append=3D kernel parameter approach is simple and lesser of
> >> the two evils? Am i missing something here..
> >> =20
> > The firmware already has to set the parameter by which the kernel
> > can tell it is a fadump kernel. Hence it already modifies the
> > devicetree for fadump and you have to rely on it to do so. =20
>=20
> Right, devicetree is modified to include new 'ibm,kernel-dump' rtas
> node informing that fadump is active.
>=20
> > The handover area which stores the additional parameters is
> > reserved by the running kernel so now you also have to rely on the
> > running kernel, the running kernel and fadum kernel having the same
> > idea about the location of handover area, the crashing kernel not
> > randomly overwriting the handover area, etc.
> >
> > In short it is additional potential for trouble which can be
> > avoided.
> >
> > I don't know if the firmware does protect the fadump reserved area
> > and devicetree from being randomly overwritten by the crashing
> > kernel but it is in the position to do so if desired. The handover
> > area is controlled by the crashing kernel and cannot have this
> > guarantee.
> >
> > =20
>=20
> I thinks there is a mixup here..
> I am no longer batting for handover area approach but
> "fadump_append=3D" approach (suggested by Michael Ellerman in this
> thread) where there is no need for a handover area but an additional
> kernel parameter fadump_append=3Dnuma=3Doff,nr_cpus=3D1.. which is a comma
> separated list of parameters the user wants to enforce when fadump is
> active.
>=20
> I was trying to say that passing additional parameters with
> 'fadump_append=3D' kernel parameter is better over f/w changing the
> chosen/bootargs node.


Ok, so a fadump specific parameter was just removed in
25031865553e5a2bd9b6e0a5d044952cfa2925d8 and with this we get one back.
If this is going to be added as an extra kernel parameter can't this be
passed to kdump kernel as well? Does kdump not have the same
restrictions?

Thanks

Michal

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 1/2] fadump: reduce memory consumption for capture kernel
  2017-04-24 14:00                         ` Michal Suchánek
@ 2017-04-25 13:13                           ` Hari Bathini
  2017-04-25 13:29                             ` Michal Suchánek
  0 siblings, 1 reply; 19+ messages in thread
From: Hari Bathini @ 2017-04-25 13:13 UTC (permalink / raw)
  To: Michal Suchánek; +Cc: linuxppc-dev, Michael Ellerman



On Monday 24 April 2017 07:30 PM, Michal Suchánek wrote:
> On Mon, 24 Apr 2017 18:26:37 +0530
> Hari Bathini <hbathini@linux.vnet.ibm.com> wrote:
>
>> Hi Michal.
>>
>> On Monday 24 April 2017 03:54 PM, Michal Suchánek wrote:
>>> On Fri, 21 Apr 2017 00:19:55 +0530
>>> Hari Bathini <hbathini@linux.vnet.ibm.com> wrote:
>>>   
>>>> On Wednesday 19 April 2017 07:38 PM, Michal Suchánek wrote:
>>>>> On Wed, 19 Apr 2017 14:19:47 +1000
>>>>> Michael Ellerman <mpe@ellerman.id.au> wrote:
>>>>>      
>>>> With the fadump_append= approach I posted in this response thread,
>>>> additional parameters are enforced when fadump is active. If f/w
>>>> supports appending additional parameters, it has to update
>>>> chosen/bootargs whenever fadump is active. Almost the same thing
>>>> except the dirty job is now done by f/w? Hence I thought
>>>> fadump_append= kernel parameter approach is simple and lesser of
>>>> the two evils? Am i missing something here..
>>>>   
>>> The firmware already has to set the parameter by which the kernel
>>> can tell it is a fadump kernel. Hence it already modifies the
>>> devicetree for fadump and you have to rely on it to do so.
>> Right, devicetree is modified to include new 'ibm,kernel-dump' rtas
>> node informing that fadump is active.
>>
>>> The handover area which stores the additional parameters is
>>> reserved by the running kernel so now you also have to rely on the
>>> running kernel, the running kernel and fadum kernel having the same
>>> idea about the location of handover area, the crashing kernel not
>>> randomly overwriting the handover area, etc.
>>>
>>> In short it is additional potential for trouble which can be
>>> avoided.
>>>
>>> I don't know if the firmware does protect the fadump reserved area
>>> and devicetree from being randomly overwritten by the crashing
>>> kernel but it is in the position to do so if desired. The handover
>>> area is controlled by the crashing kernel and cannot have this
>>> guarantee.
>>>
>>>   
>> I thinks there is a mixup here..
>> I am no longer batting for handover area approach but
>> "fadump_append=" approach (suggested by Michael Ellerman in this
>> thread) where there is no need for a handover area but an additional
>> kernel parameter fadump_append=numa=off,nr_cpus=1.. which is a comma
>> separated list of parameters the user wants to enforce when fadump is
>> active.
>>
>> I was trying to say that passing additional parameters with
>> 'fadump_append=' kernel parameter is better over f/w changing the
>> chosen/bootargs node.
>
> Ok, so a fadump specific parameter was just removed in
> 25031865553e5a2bd9b6e0a5d044952cfa2925d8 and with this we get one back.

I think you mean commit c2afb7ce9d088696427018ba2135b898058507b8
from linux-next. If so, yes.

> If this is going to be added as an extra kernel parameter can't this be
> passed to kdump kernel as well? Does kdump not have the same
> restrictions?
>

kdump kernel is loaded with kexec and additional parameters can be 
passed to it at the
time of loading. But fadump kernel boots like a regular kernel (through 
f/w & bootloader,
resetting all the h/w) except that f/w guarantees to keep the memory 
intact. So, there is
a need for a different approach to pass additional parameters in case of 
fadump..

Thanks
Hari

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 1/2] fadump: reduce memory consumption for capture kernel
  2017-04-25 13:13                           ` Hari Bathini
@ 2017-04-25 13:29                             ` Michal Suchánek
  0 siblings, 0 replies; 19+ messages in thread
From: Michal Suchánek @ 2017-04-25 13:29 UTC (permalink / raw)
  To: Hari Bathini; +Cc: linuxppc-dev

On Tue, 25 Apr 2017 18:43:11 +0530
Hari Bathini <hbathini@linux.vnet.ibm.com> wrote:

> On Monday 24 April 2017 07:30 PM, Michal Such=C3=A1nek wrote:
> > On Mon, 24 Apr 2017 18:26:37 +0530
> > Hari Bathini <hbathini@linux.vnet.ibm.com> wrote:
> > =20
> >> Hi Michal.
> >>

> >>>    =20
> >> I thinks there is a mixup here..
> >> I am no longer batting for handover area approach but
> >> "fadump_append=3D" approach (suggested by Michael Ellerman in this
> >> thread) where there is no need for a handover area but an
> >> additional kernel parameter fadump_append=3Dnuma=3Doff,nr_cpus=3D1..
> >> which is a comma separated list of parameters the user wants to
> >> enforce when fadump is active.
> >>
> >> I was trying to say that passing additional parameters with
> >> 'fadump_append=3D' kernel parameter is better over f/w changing the
> >> chosen/bootargs node. =20
> >
> > Ok, so a fadump specific parameter was just removed in
> > 25031865553e5a2bd9b6e0a5d044952cfa2925d8 and with this we get one
> > back. =20
>=20
> I think you mean commit c2afb7ce9d088696427018ba2135b898058507b8
> from linux-next. If so, yes.
>=20
> > If this is going to be added as an extra kernel parameter can't
> > this be passed to kdump kernel as well? Does kdump not have the same
> > restrictions?
> > =20
>=20
> kdump kernel is loaded with kexec and additional parameters can be=20
> passed to it at the
> time of loading. But fadump kernel boots like a regular kernel
> (through f/w & bootloader,
> resetting all the h/w) except that f/w guarantees to keep the memory=20
> intact. So, there is
> a need for a different approach to pass additional parameters in case
> of fadump..
>=20

I see, you can pass different commandline to the kdump kernel while
loading it with kexec but fadump kernel is booted same as normal kernel
until it looks at the devicetree so the extra arguments have to be
passed always and only appended to the commandline in the fadump case.

This sounds reasonable.

Thanks

Michal

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2017-04-25 13:29 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-02-07 12:38 [PATCH v2 1/2] fadump: reduce memory consumption for capture kernel Hari Bathini
2017-02-07 12:38 ` [PATCH v2 2/2] fadump: update documentation about introduction of handover area Hari Bathini
2017-04-07  1:54 ` [PATCH v2 1/2] fadump: reduce memory consumption for capture kernel Michael Ellerman
2017-04-07  7:24   ` Hari Bathini
2017-04-07  9:06     ` Hari Bathini
2017-04-07 13:46     ` Michael Ellerman
2017-04-07 17:41       ` Hari Bathini
2017-04-12 20:29       ` Hari Bathini
2017-04-13 19:58         ` Michal Suchánek
2017-04-17 15:13           ` Hari Bathini
2017-04-18 14:18             ` Michal Suchánek
2017-04-19  4:19               ` Michael Ellerman
2017-04-19 14:08                 ` Michal Suchánek
2017-04-20 18:49                   ` Hari Bathini
2017-04-24 10:24                     ` Michal Suchánek
2017-04-24 12:56                       ` Hari Bathini
2017-04-24 14:00                         ` Michal Suchánek
2017-04-25 13:13                           ` Hari Bathini
2017-04-25 13:29                             ` Michal Suchánek

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.