All of lore.kernel.org
 help / color / mirror / Atom feed
From: Hari Bathini <hbathini@linux.ibm.com>
To: Ananth N Mavinakayanahalli <ananth@linux.ibm.com>,
	Michael Ellerman <mpe@ellerman.id.au>,
	Mahesh J Salgaonkar <mahesh@linux.ibm.com>,
	Vasant Hegde <hegdevasant@linux.ibm.com>,
	linuxppc-dev <linuxppc-dev@ozlabs.org>,
	Stewart Smith <stewart@linux.ibm.com>
Cc: Hari Bathini <hbathini@linux.vnet.ibm.com>
Subject: [RFC PATCH 3/5] powerpc/fadump: enable fadump support on powernv platform
Date: Tue, 15 May 2018 10:29:15 +0530	[thread overview]
Message-ID: <152636035587.17123.1844308737319409343.stgit@hbathini.in.ibm.com> (raw)
In-Reply-To: <152636029761.17123.10365462779196202939.stgit@hbathini.in.ibm.com>

From: Hari Bathini <hbathini@linux.vnet.ibm.com>

Firmware-assisted dump support is enabled for POWERNV platform in P9
firmware. Make the corresponding updates in kernel to enable fadump
support on POWERNV platform.

Signed-off-by: Hari Bathini <hbathini@linux.vnet.ibm.com>
---
 arch/powerpc/Kconfig                            |    2 
 arch/powerpc/include/asm/opal-api.h             |    4 
 arch/powerpc/include/asm/opal.h                 |    1 
 arch/powerpc/kernel/fadump.c                    |  397 ++++++++++++++++++++---
 arch/powerpc/kernel/fadump_internal.h           |   27 ++
 arch/powerpc/platforms/powernv/Makefile         |    1 
 arch/powerpc/platforms/powernv/opal-wrappers.S  |    1 
 arch/powerpc/platforms/powernv/powernv_fadump.c |  337 ++++++++++++++++++++
 arch/powerpc/platforms/powernv/powernv_fadump.h |   63 ++++
 arch/powerpc/platforms/pseries/pseries_fadump.c |    8 
 10 files changed, 783 insertions(+), 58 deletions(-)
 create mode 100644 arch/powerpc/platforms/powernv/powernv_fadump.c
 create mode 100644 arch/powerpc/platforms/powernv/powernv_fadump.h

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index c32a181..d749f1f 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -599,7 +599,7 @@ config CRASH_DUMP
 
 config FA_DUMP
 	bool "Firmware-assisted dump"
-	depends on PPC64 && PPC_RTAS
+	depends on PPC64 && (PPC_RTAS || PPC_POWERNV)
 	select CRASH_CORE
 	select CRASH_DUMP
 	help
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index d886a5b..75e8925 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -206,7 +206,8 @@
 #define OPAL_NPU_TL_SET				161
 #define OPAL_PCI_GET_PBCQ_TUNNEL_BAR		164
 #define OPAL_PCI_SET_PBCQ_TUNNEL_BAR		165
-#define OPAL_LAST				165
+#define OPAL_CONFIGURE_FADUMP			167
+#define OPAL_LAST				167
 
 /* Device tree flags */
 
@@ -1040,6 +1041,7 @@ enum OpalSysCooling {
 enum {
 	OPAL_REBOOT_NORMAL		= 0,
 	OPAL_REBOOT_PLATFORM_ERROR	= 1,
+	OPAL_REBOOT_MPIPL		= 3,
 };
 
 /* Argument to OPAL_PCI_TCE_KILL */
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 03e1a92..4c1f483 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -43,6 +43,7 @@ int64_t opal_npu_spa_clear_cache(uint64_t phb_id, uint32_t bdfn,
 				uint64_t PE_handle);
 int64_t opal_npu_tl_set(uint64_t phb_id, uint32_t bdfn, long cap,
 			uint64_t rate_phys, uint32_t size);
+int64_t opal_configure_fadump(uint64_t command, void *data, uint64_t data_size);
 int64_t opal_console_write(int64_t term_number, __be64 *length,
 			   const uint8_t *buffer);
 int64_t opal_console_read(int64_t term_number, __be64 *length,
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 88fafe1..a27e4af 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -53,8 +53,12 @@ static struct cma *fadump_cma;
 struct fadump_ops_t *fadump_ops;
 
 static DEFINE_MUTEX(fadump_mutex);
-struct fad_crash_memory_ranges crash_memory_ranges[INIT_CRASHMEM_RANGES];
+struct fadump_memory_range crash_memory_ranges[INIT_CRASHMEM_RANGES];
 int crash_mem_ranges;
+struct fadump_memory_range reserved_ranges[INIT_MEMBLOCK_REGIONS];
+int reserved_ranges_cnt;
+struct fadump_memory_range memory_ranges[2 * INIT_MEMBLOCK_REGIONS];
+int memory_ranges_cnt;
 
 #ifdef CONFIG_CMA
 /*
@@ -120,6 +124,9 @@ int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname,
 	if (strcmp(uname, "rtas") == 0)
 		return pseries_dt_scan_fadump(&fw_dump, node);
 
+	if (strcmp(uname, "ibm,dump") == 0)
+		return powernv_dt_scan_fadump(&fw_dump, node);
+
 	return 0;
 }
 
@@ -156,6 +163,8 @@ int is_fadump_active(void)
 /* Print firmware assisted dump configurations for debugging purpose. */
 static void fadump_show_config(void)
 {
+	int i;
+
 	pr_debug("Support for firmware-assisted dump (fadump): %s\n",
 			(fw_dump.fadump_supported ? "present" : "no support"));
 
@@ -170,6 +179,13 @@ static void fadump_show_config(void)
 	pr_debug("    CPU state data size: %lx\n", fw_dump.cpu_state_data_size);
 	pr_debug("    HPTE region size   : %lx\n", fw_dump.hpte_region_size);
 	pr_debug("Boot memory size  : %lx\n", fw_dump.boot_memory_size);
+	pr_debug("Real memory region hole size  : %lx\n",
+		 fw_dump.boot_memory_hole_size);
+	pr_debug("Real meory regions count : %lx\n", fw_dump.rmr_regions_cnt);
+	for (i = 0; i < fw_dump.rmr_regions_cnt; i++) {
+		pr_debug("%d. RMR base = %lx, size = %lx\n", (i+1),
+			 fw_dump.rmr_src_addr[i], fw_dump.rmr_src_size[i]);
+	}
 }
 
 /**
@@ -243,6 +259,157 @@ static inline unsigned long fadump_calculate_reserve_size(void)
 	return (size > MIN_BOOT_MEM ? size : MIN_BOOT_MEM);
 }
 
+static void __init fadump_get_reserved_ranges(void)
+{
+	unsigned long i, j, dt_root;
+	unsigned long long base, size;
+	struct fadump_memory_range tmp_range;
+	const __be32 *prop;
+	int len, idx;
+
+	early_init_fdt_reserve_self();
+	early_init_fdt_scan_reserved_mem();
+
+	dt_root = of_get_flat_dt_root();
+
+	prop = of_get_flat_dt_prop(dt_root, "reserved-ranges", &len);
+
+	if (!prop)
+		return;
+
+	/*
+	 * Each reserved range is an (address,size) pair, 2 cells each,
+	 * totalling 4 cells per range.
+	 */
+	for (i = 0; i < len / (sizeof(*prop) * 4); i++) {
+		u64 base, size;
+
+		base = of_read_number(prop + (i * 4) + 0, 2);
+		size = of_read_number(prop + (i * 4) + 2, 2);
+		if (!size)
+			continue;
+
+		reserved_ranges[reserved_ranges_cnt].base = base;
+		reserved_ranges[reserved_ranges_cnt].size = size;
+		reserved_ranges_cnt++;
+
+	}
+
+	if (!reserved_ranges_cnt)
+		return;
+
+	/* Sort the reserved ranges */
+	for (i = 0; i < reserved_ranges_cnt; i++) {
+		idx = i;
+		for (j = i + 1; j < reserved_ranges_cnt; j++) {
+			if (reserved_ranges[idx].base > reserved_ranges[j].base)
+				idx = j;
+		}
+		if (idx != i) {
+			tmp_range = reserved_ranges[idx];
+			reserved_ranges[idx] = reserved_ranges[i];
+			reserved_ranges[i] = tmp_range;
+		}
+	}
+
+	/* Merge adjacent reserved ranges */
+	idx = 0;
+	for (i = 1; i < reserved_ranges_cnt; i++) {
+		base = reserved_ranges[i-1].base;
+		size = reserved_ranges[i-1].size;
+		if (reserved_ranges[i].base == (base + size))
+			reserved_ranges[idx].size += reserved_ranges[i].size;
+		else {
+			idx++;
+			if (i == idx)
+				continue;
+
+			reserved_ranges[idx] = reserved_ranges[i];
+		}
+	}
+	reserved_ranges_cnt = idx + 1;
+}
+
+static inline void fadump_add_memory_range(unsigned long long base,
+					   unsigned long long size)
+{
+	if (!size)
+		return;
+
+	pr_debug("memory_range[%d] [%#016llx-%#016llx), %#llx bytes\n",
+		 memory_ranges_cnt, base, base + size, size);
+	memory_ranges[memory_ranges_cnt].base = base;
+	memory_ranges[memory_ranges_cnt].size = size;
+	memory_ranges_cnt++;
+}
+
+static void fadump_setup_memory_ranges(void)
+{
+	unsigned long i, j;
+	unsigned long long base, end, size;
+	struct memblock_region *reg;
+	struct fadump_memory_range tmp_ranges[INIT_MEMBLOCK_REGIONS];
+	int tmp_ranges_cnt;
+
+	/* get memory ranges */
+	tmp_ranges_cnt = 0;
+	for_each_memblock(memory, reg) {
+		tmp_ranges[tmp_ranges_cnt].base = (unsigned long long)reg->base;
+		tmp_ranges[tmp_ranges_cnt].size = (unsigned long long)reg->size;
+		tmp_ranges_cnt++;
+	}
+
+	/* exclude reserved ranges */
+	memory_ranges_cnt = 0;
+	for (i = 0; i < tmp_ranges_cnt; i++) {
+		unsigned long long mem_base, mem_end, rsrv_base, rsrv_end;
+		int add = 1;
+
+		base = mem_base = tmp_ranges[i].base;
+		end = mem_end = base + tmp_ranges[i].size;
+		for (j = 0; j < reserved_ranges_cnt; j++) {
+			rsrv_base = reserved_ranges[j].base;
+			rsrv_end  = rsrv_base + reserved_ranges[j].size;
+
+			if (mem_base > rsrv_end)
+				continue;
+
+			if ((j < (reserved_ranges_cnt - 1)) &&
+			    (reserved_ranges[j + 1].base < mem_end))
+				mem_end = reserved_ranges[j + 1].base;
+
+			if ((rsrv_base < mem_end) && (rsrv_end > mem_base)) {
+				if ((mem_base < rsrv_base) &&
+				    (mem_end > rsrv_end)) {
+					size = rsrv_base - mem_base;
+					fadump_add_memory_range(mem_base, size);
+					size = mem_end - rsrv_end;
+					fadump_add_memory_range(rsrv_end, size);
+				} else if (mem_base < rsrv_base) {
+					size = rsrv_base - mem_base;
+					fadump_add_memory_range(mem_base, size);
+				} else if (mem_end > rsrv_end) {
+					size = mem_end - rsrv_end;
+					fadump_add_memory_range(rsrv_end, size);
+				}
+
+				add = 0;
+			}
+
+			if (mem_end == end)
+				break;
+
+			mem_base = mem_end;
+			mem_end = end;
+			add = 1;
+		}
+
+		if (add)
+			fadump_add_memory_range(mem_base, mem_end - mem_base);
+	}
+}
+
+
 /*
  * Calculate the total memory size required to be reserved for
  * firmware-assisted dump registration.
@@ -264,24 +431,113 @@ static unsigned long get_fadump_area_size(void)
 	return size;
 }
 
-static void __init fadump_reserve_crash_area(unsigned long base,
-					     unsigned long size)
+/* Preserve everything above the base address */
+static void __init fadump_reserve_crash_area(unsigned long base)
 {
-	struct memblock_region *reg;
-	unsigned long mstart, mend, msize;
+	int i;
+	unsigned long mstart, msize;
 
-	for_each_memblock(memory, reg) {
-		mstart = max_t(unsigned long, base, reg->base);
-		mend = reg->base + reg->size;
-		mend = min(base + size, mend);
-
-		if (mstart < mend) {
-			msize = mend - mstart;
-			memblock_reserve(mstart, msize);
-			pr_info("Reserved %ldMB of memory at %#016lx for saving crash dump\n",
-				(msize >> 20), mstart);
+	for (i = 0; i < memory_ranges_cnt; i++) {
+		mstart = memory_ranges[i].base;
+		msize = memory_ranges[i].size;
+		if ((mstart + msize) < base)
+			continue;
+
+		if (mstart < base) {
+			msize -= (base - mstart);
+			mstart = base;
 		}
+		pr_info("Reserving %luMB of memory at %#016lx for saving crash dump",
+			(msize >> 20), mstart);
+		memblock_reserve(mstart, msize);
+	}
+}
+
+static int __init add_rmr_region(unsigned long rmr_start,
+				 unsigned long rmr_size)
+{
+	int i = fw_dump.rmr_regions_cnt++;
+
+	if (fw_dump.rmr_regions_cnt > MAX_REAL_MEM_REGIONS)
+		return 0;
+
+	pr_debug("Added real memory range[%d] [%#016lx-%#016lx)\n",
+		 i, rmr_start, (rmr_start + rmr_size));
+	fw_dump.rmr_src_addr[i] = rmr_start;
+	fw_dump.rmr_src_size[i] = rmr_size;
+	return 1;
+}
+
+/*
+ * Platforms like PowerNV have an upper limit on the size.
+ * If 'rmr_size' is bigger than that limit, split this memory range
+ * into multiple entries.
+ */
+static int __init add_rmr_regions(unsigned long rmr_start,
+				  unsigned long rmr_size)
+{
+	unsigned long rstart, rsize, max_size;
+	int ret = 1;
+
+	rstart = rmr_start;
+	max_size = fw_dump.max_copy_size ? fw_dump.max_copy_size : rmr_size;
+	while (rmr_size) {
+		if (rmr_size > max_size)
+			rsize = max_size;
+		else
+			rsize = rmr_size;
+
+		ret = add_rmr_region(rstart, rsize);
+		if (!ret)
+			break;
+
+		rmr_size -= rsize;
+		rstart += rsize;
 	}
+
+	return ret;
+}
+
+static int __init fadump_get_rmr_regions(void)
+{
+	int i, ret = 1;
+	unsigned long base, size, last_end;
+	unsigned long mem_size = fw_dump.boot_memory_size;
+
+	fw_dump.rmr_regions_cnt = 0;
+	fw_dump.boot_memory_hole_size = 0;
+
+	/*
+	 * TODO: Extent support for multiple real memory regions on
+	 *       pseries platform too.
+	 */
+	if (fw_dump.fadump_platform == FADUMP_PLATFORM_PSERIES) {
+		ret = add_rmr_regions(RMA_START, fw_dump.boot_memory_size);
+		return ret;
+	}
+
+	last_end = memory_ranges[0].base;
+	for (i = 0; i < memory_ranges_cnt; i++) {
+		base = memory_ranges[i].base;
+		size = memory_ranges[i].size;
+
+		if (base > last_end)
+			fw_dump.boot_memory_hole_size += (base - last_end);
+
+		if (size >= mem_size) {
+			ret = add_rmr_regions(base, mem_size);
+			break;
+		}
+
+		mem_size -= size;
+		ret = add_rmr_regions(base, size);
+		if (!ret)
+			break;
+
+		last_end = base + size;
+	}
+
+	return ret;
 }
 
 int __init fadump_reserve_mem(void)
@@ -297,6 +553,10 @@ int __init fadump_reserve_mem(void)
 		fw_dump.fadump_enabled = 0;
 		return 0;
 	}
+
+	fadump_get_reserved_ranges();
+	fadump_setup_memory_ranges();
+
 	/*
 	 * Initialize boot memory size
 	 * If dump is active then we have already calculated the size during
@@ -311,6 +571,11 @@ int __init fadump_reserve_mem(void)
 						 FADUMP_CMA_ALIGNMENT);
 #endif
 		fw_dump.rmr_source_len = fw_dump.boot_memory_size;
+		if (!fadump_get_rmr_regions()) {
+			fw_dump.fadump_enabled = 0;
+			pr_err("Too many holes in boot memory area to enable fadump\n");
+			return 0;
+		}
 	}
 
 	size = get_fadump_area_size();
@@ -335,6 +600,7 @@ int __init fadump_reserve_mem(void)
 	else
 		memory_boundary = memblock_end_of_DRAM();
 
+	base = fw_dump.boot_memory_size + fw_dump.boot_memory_hole_size;
 	if (fw_dump.dump_active) {
 #ifdef CONFIG_HUGETLB_PAGE
 		/*
@@ -348,11 +614,9 @@ int __init fadump_reserve_mem(void)
 		 * If last boot has crashed then reserve all the memory
 		 * above boot_memory_size so that we don't touch it until
 		 * dump is written to disk by userspace tool. This memory
-		 * will be released for general use once the dump is saved.
+		 * can be released for general use by invalidating fadump.
 		 */
-		base = fw_dump.boot_memory_size;
-		size = memory_boundary - base;
-		fadump_reserve_crash_area(base, size);
+		fadump_reserve_crash_area(base);
 
 		fw_dump.fadumphdr_addr =
 			fadump_ops->get_meta_area_start(&fw_dump);
@@ -366,23 +630,22 @@ int __init fadump_reserve_mem(void)
 		 * use memblock_find_in_range() here since it doesn't allocate
 		 * from bottom to top.
 		 */
-		for (base = fw_dump.boot_memory_size;
-		     base <= (memory_boundary - size);
-		     base += size) {
+		while (base <= (memory_boundary - size)) {
 			if (memblock_is_region_memory(base, size) &&
 			    !memblock_is_region_reserved(base, size))
 				break;
+
+			base += size;
 		}
+
 		if ((base > (memory_boundary - size)) ||
 		    memblock_reserve(base, size)) {
 			pr_err("Failed to reserve memory\n");
 			return 0;
 		}
 
-		pr_info("Reserved %ldMB of memory at %ldMB for firmware-"
-			"assisted dump (System RAM: %ldMB)\n",
-			(unsigned long)(size >> 20),
-			(unsigned long)(base >> 20),
+		pr_info("Reserved %ldMB of memory at %#016lx (System RAM: %ldMB)\n",
+			(unsigned long)(size >> 20), base,
 			(unsigned long)(memblock_phys_mem_size() >> 20));
 
 		fw_dump.reserve_dump_area_start = base;
@@ -543,23 +806,28 @@ static int fadump_init_elfcore_header(char *bufp)
  */
 static void fadump_setup_crash_memory_ranges(void)
 {
-	struct memblock_region *reg;
-	unsigned long long start, end;
+	unsigned long long start, end, offset;
+	int i;
 
 	pr_debug("Setup crash memory ranges.\n");
 	crash_mem_ranges = 0;
+	offset = fw_dump.boot_memory_size + fw_dump.boot_memory_hole_size;
+
 	/*
-	 * add the first memory chunk (RMA_START through boot_memory_size) as
-	 * a separate memory chunk. The reason is, at the time crash firmware
-	 * will move the content of this memory chunk to different location
-	 * specified during fadump registration. We need to create a separate
-	 * program header for this chunk with the correct offset.
+	 * Add real memory region(s) whose content is going to be moved to
+	 * a different location, specified during fadump registration, by
+	 * firmware at the time of crash. We need to create separate program
+	 * header(s) for this memory chunk with the correct offset.
 	 */
-	fadump_add_crash_memory(RMA_START, fw_dump.boot_memory_size);
+	for (i = 0; i < fw_dump.rmr_regions_cnt; i++) {
+		start = fw_dump.rmr_src_addr[i];
+		end = start + fw_dump.rmr_src_size[i];
+		fadump_add_crash_memory(start, end);
+	}
 
-	for_each_memblock(memory, reg) {
-		start = (unsigned long long)reg->base;
-		end = start + (unsigned long long)reg->size;
+	for (i = 0; i < memory_ranges_cnt; i++) {
+		start = memory_ranges[i].base;
+		end = start + memory_ranges[i].size;
 
 		/*
 		 * skip the first memory chunk that is already added (RMA_START
@@ -567,9 +835,9 @@ static void fadump_setup_crash_memory_ranges(void)
 		 * when RMA_START changes to a non-zero value.
 		 */
 		BUILD_BUG_ON(RMA_START != 0);
-		if (start < fw_dump.boot_memory_size) {
-			if (end > fw_dump.boot_memory_size)
-				start = fw_dump.boot_memory_size;
+		if (start < offset) {
+			if (end > offset)
+				start = offset;
 			else
 				continue;
 		}
@@ -586,17 +854,32 @@ static void fadump_setup_crash_memory_ranges(void)
  */
 static inline unsigned long fadump_relocate(unsigned long paddr)
 {
-	if (paddr > RMA_START && paddr < fw_dump.boot_memory_size)
-		return fw_dump.rmr_destination_addr + paddr;
-	else
-		return paddr;
+	unsigned long raddr, rstart, rend, offset;
+	int i;
+
+	offset = 0;
+	raddr = paddr;
+	for (i = 0; i < fw_dump.rmr_regions_cnt; i++) {
+		rstart = fw_dump.rmr_src_addr[i];
+		rend = rstart + fw_dump.rmr_src_size[i];
+
+		if (paddr > rstart && paddr < rend) {
+			raddr += fw_dump.rmr_destination_addr + offset;
+			break;
+		}
+
+		offset += fw_dump.rmr_src_size[i];
+	}
+
+	return raddr;
 }
 
 static int fadump_create_elfcore_headers(char *bufp)
 {
 	struct elfhdr *elf;
 	struct elf_phdr *phdr;
-	int i;
+	unsigned long long raddr, offset;
+	int i, j;
 
 	fadump_init_elfcore_header(bufp);
 	elf = (struct elfhdr *)bufp;
@@ -639,9 +922,12 @@ static int fadump_create_elfcore_headers(char *bufp)
 	(elf->e_phnum)++;
 
 	/* setup PT_LOAD sections. */
-
+	j = 0;
+	offset = 0;
+	raddr = fw_dump.rmr_src_addr[0];
 	for (i = 0; i < crash_mem_ranges; i++) {
 		unsigned long long mbase, msize;
+
 		mbase = crash_memory_ranges[i].base;
 		msize = crash_memory_ranges[i].size;
 
@@ -654,13 +940,17 @@ static int fadump_create_elfcore_headers(char *bufp)
 		phdr->p_flags	= PF_R|PF_W|PF_X;
 		phdr->p_offset	= mbase;
 
-		if (mbase == RMA_START) {
+		if (mbase == raddr) {
 			/*
 			 * The entire RMA region will be moved by firmware
 			 * to the specified destination_address. Hence set
 			 * the correct offset.
 			 */
-			phdr->p_offset = fw_dump.rmr_destination_addr;
+			phdr->p_offset = fw_dump.rmr_destination_addr + offset;
+			if (j < (fw_dump.rmr_regions_cnt - 1)) {
+				offset += fw_dump.rmr_src_size[j];
+				raddr = fw_dump.rmr_src_addr[++j];
+			}
 		}
 
 		phdr->p_paddr = mbase;
@@ -707,6 +997,7 @@ static int register_fadump(void)
 	if (!fw_dump.reserve_dump_area_size)
 		return -ENODEV;
 
+	fadump_setup_memory_ranges();
 	fadump_setup_crash_memory_ranges();
 
 	addr = fadump_ops->get_meta_area_start(&fw_dump);
@@ -822,14 +1113,14 @@ static void fadump_invalidate_release_mem(void)
 	 * later for releasing the memory for general use.
 	 */
 	reserved_area_start = fw_dump.reserve_dump_area_start;
-	reserved_area_end = reserved_area_start +
-			fw_dump.reserve_dump_area_size;
+	reserved_area_end =
+		memory_limit ? memory_limit : memblock_end_of_DRAM();
+
 	/*
-	 * Setup reserve_dump_area_start and its size so that we can
-	 * reuse this reserved memory for Re-registration.
+	 * Setup reserve_dump_area_start so that we can reuse this
+	 * reserved memory for Re-registration.
 	 */
 	fw_dump.reserve_dump_area_start = destination_address;
-	fw_dump.reserve_dump_area_size = get_fadump_area_size();
 
 	fadump_release_memory(reserved_area_start, reserved_area_end);
 	if (fw_dump.cpu_notes_buf) {
diff --git a/arch/powerpc/kernel/fadump_internal.h b/arch/powerpc/kernel/fadump_internal.h
index 3791da7..eae4b55 100644
--- a/arch/powerpc/kernel/fadump_internal.h
+++ b/arch/powerpc/kernel/fadump_internal.h
@@ -49,6 +49,7 @@
 
 /* Firmware-Assited Dump platforms */
 #define FADUMP_PLATFORM_PSERIES		1
+#define FADUMP_PLATFORM_POWERNV		2
 
 #define FADUMP_CPU_ID_MASK		((1UL << 32) - 1)
 
@@ -92,11 +93,14 @@ struct fadump_crash_info_header {
 /* Crash memory ranges */
 #define INIT_CRASHMEM_RANGES	(INIT_MEMBLOCK_REGIONS + 2)
 
-struct fad_crash_memory_ranges {
+struct fadump_memory_range {
 	unsigned long long	base;
 	unsigned long long	size;
 };
 
+/* Maximum no. of real memory regions supported by the kernel */
+#define MAX_REAL_MEM_REGIONS		6
+
 /* Firmware-assisted dump configuration details. */
 struct fw_dump {
 	unsigned long	cpu_state_data_size;
@@ -114,6 +118,17 @@ struct fw_dump {
 	unsigned long	rmr_source_len;
 	unsigned long	rmr_destination_addr;
 
+	unsigned long	boot_memory_hole_size;
+	unsigned long	rmr_regions_cnt;
+	unsigned long	rmr_src_addr[MAX_REAL_MEM_REGIONS];
+	unsigned long	rmr_src_size[MAX_REAL_MEM_REGIONS];
+
+	/*
+	 * Maximum size supported by firmware to copy from source to
+	 * destination address per entry.
+	 */
+	unsigned long	max_copy_size;
+
 	int		ibm_configure_kernel_dump;
 
 	unsigned long	fadump_enabled:1;
@@ -157,4 +172,14 @@ pseries_dt_scan_fadump(struct fw_dump *fadump_config, ulong node)
 }
 #endif
 
+#ifdef CONFIG_PPC_POWERNV
+extern int powernv_dt_scan_fadump(struct fw_dump *fadump_config, ulong node);
+#else
+static inline int
+powernv_dt_scan_fadump(struct fw_dump *fadump_config, ulong node)
+{
+	return 1;
+}
+#endif
+
 #endif /* __PPC64_FA_DUMP_INTERNAL_H__ */
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index 703a350..0d106b5 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -6,6 +6,7 @@ obj-y			+= opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o
 obj-y			+= opal-kmsg.o opal-powercap.o opal-psr.o opal-sensor-groups.o
 
 obj-$(CONFIG_SMP)	+= smp.o subcore.o subcore-asm.o
+obj-$(CONFIG_FA_DUMP)	+= powernv_fadump.o
 obj-$(CONFIG_PCI)	+= pci.o pci-ioda.o npu-dma.o
 obj-$(CONFIG_CXL_BASE)	+= pci-cxl.o
 obj-$(CONFIG_EEH)	+= eeh-powernv.o
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 3da30c2..20bbb9c 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -325,3 +325,4 @@ OPAL_CALL(opal_npu_spa_clear_cache,		OPAL_NPU_SPA_CLEAR_CACHE);
 OPAL_CALL(opal_npu_tl_set,			OPAL_NPU_TL_SET);
 OPAL_CALL(opal_pci_get_pbcq_tunnel_bar,		OPAL_PCI_GET_PBCQ_TUNNEL_BAR);
 OPAL_CALL(opal_pci_set_pbcq_tunnel_bar,		OPAL_PCI_SET_PBCQ_TUNNEL_BAR);
+OPAL_CALL(opal_configure_fadump,		OPAL_CONFIGURE_FADUMP);
diff --git a/arch/powerpc/platforms/powernv/powernv_fadump.c b/arch/powerpc/platforms/powernv/powernv_fadump.c
new file mode 100644
index 0000000..6d4b515
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/powernv_fadump.c
@@ -0,0 +1,337 @@
+/*
+ * Firmware-Assisted Dump support on POWERNV platform.
+ *
+ * Copyright 2018, IBM Corporation
+ * Author: Hari Bathini <hbathini@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#undef DEBUG
+#define pr_fmt(fmt) "powernv fadump: " fmt
+
+#include <linux/string.h>
+#include <linux/memblock.h>
+#include <linux/bug.h>
+#include <linux/seq_file.h>
+#include <linux/crash_dump.h>
+
+#include <asm/page.h>
+#include <asm/prom.h>
+#include <asm/opal.h>
+#include <asm/fadump.h>
+
+#include "../../kernel/fadump_internal.h"
+#include "powernv_fadump.h"
+
+static struct powernv_fadump_mem_struct fdm;
+static const struct powernv_fadump_mem_struct *fdm_active;
+unsigned long fdm_actual_size;
+
+static void update_fadump_config(struct fw_dump *fadump_conf,
+				 const struct powernv_fadump_mem_struct *fdm)
+{
+	unsigned long base, size, last_end;
+	int section_cnt = be16_to_cpu(fdm->section_count);
+	int unused_sections  = (POWERNV_MAX_SECTIONS - section_cnt);
+	int i, j;
+
+	pr_debug("section_cnt: %d\n", section_cnt);
+	WARN_ON(unused_sections < 0);
+	fdm_actual_size = sizeof(*fdm) -
+		(unused_sections * sizeof(struct powernv_fadump_section));
+
+	/*
+	 * The first real memory region entry is the real memory
+	 * regions destination address.
+	 */
+	fadump_conf->rmr_destination_addr = 0;
+	for (i = 0; i < section_cnt; i++) {
+		if (fdm->section[i].src_type ==
+		    POWERNV_FADUMP_REAL_MODE_REGION) {
+			fadump_conf->rmr_destination_addr =
+				be64_to_cpu(fdm->section[i].dest_addr);
+			break;
+		}
+	}
+	pr_debug("Destination address of real memory regions: %#016lx\n",
+		 fadump_conf->rmr_destination_addr);
+
+	if (fadump_conf->dump_active) {
+		j = 0;
+		last_end = 0;
+		fadump_conf->rmr_source_len = 0;
+		fadump_conf->boot_memory_hole_size = 0;
+		for (i = 0; i < section_cnt; i++) {
+			if (fdm->section[i].src_type ==
+			    POWERNV_FADUMP_REAL_MODE_REGION) {
+				base = be64_to_cpu(fdm->section[i].src_addr);
+				size = be64_to_cpu(fdm->section[i].src_size);
+				pr_debug("%d. RMR base: 0x%lx, size: 0x%lx\n",
+					 (i + 1), base, size);
+
+				fadump_conf->rmr_src_addr[j] = base;
+				fadump_conf->rmr_src_size[j] = size;
+				fadump_conf->rmr_source_len += size;
+
+				if (base > last_end) {
+					fadump_conf->boot_memory_hole_size +=
+						(base - last_end);
+				}
+
+				last_end = base + size;
+				j++;
+			}
+		}
+		fadump_conf->rmr_regions_cnt = j;
+		pr_debug("Real memory regions count: %lu\n",
+			 fadump_conf->rmr_regions_cnt);
+	}
+}
+
+static ulong powernv_init_fadump_mem_struct(struct fw_dump *fadump_conf,
+					    ulong addr)
+{
+	int i, section_cnt = 0;
+
+	fdm.section_size = cpu_to_be16(sizeof(struct powernv_fadump_section));
+
+	/* RMA region sections */
+	for (i = 0; i < fadump_conf->rmr_regions_cnt; i++) {
+		fdm.section[RMR_REGION_INPUT_IDX + i].src_type  =
+			POWERNV_FADUMP_REAL_MODE_REGION;
+		fdm.section[RMR_REGION_INPUT_IDX + i].src_addr  =
+			cpu_to_be64(fadump_conf->rmr_src_addr[i]);
+		fdm.section[RMR_REGION_INPUT_IDX + i].dest_addr =
+			cpu_to_be64(addr);
+		fdm.section[RMR_REGION_INPUT_IDX + i].src_size  =
+			fdm.section[RMR_REGION_INPUT_IDX + i].dest_size =
+			cpu_to_be64(fadump_conf->rmr_src_size[i]);
+
+		section_cnt++;
+		addr += fadump_conf->rmr_src_size[i];
+	}
+
+	fdm.section_count = cpu_to_be16(section_cnt);
+	update_fadump_config(fadump_conf, &fdm);
+
+	return addr;
+}
+
+static int powernv_register_fadump(struct fw_dump *fadump_conf)
+{
+	int rc, err = -EIO;
+
+	rc = opal_configure_fadump(FADUMP_REGISTER, &fdm, fdm_actual_size);
+	switch (rc) {
+	default:
+		pr_err("Failed to register. Unknown Error(%d).\n", rc);
+		break;
+	case OPAL_UNSUPPORTED:
+		pr_err("Support not available.\n");
+		fadump_conf->fadump_supported = 0;
+		fadump_conf->fadump_enabled = 0;
+		break;
+	case OPAL_INTERNAL_ERROR:
+		pr_err("Failed to register. Hardware Error(%d).\n", rc);
+		break;
+	case OPAL_PARAMETER:
+		pr_err("Failed to register. Parameter Error(%d).\n", rc);
+		break;
+	case OPAL_PERMISSION:
+		pr_err("Already registered!\n");
+		fadump_conf->dump_registered = 1;
+		err = -EEXIST;
+		break;
+	case OPAL_SUCCESS:
+		pr_err("Registration is successful!\n");
+		fadump_conf->dump_registered = 1;
+		err = 0;
+		break;
+	}
+
+	return err;
+}
+
+static int powernv_unregister_fadump(struct fw_dump *fadump_conf)
+{
+	int rc;
+
+	rc = opal_configure_fadump(FADUMP_UNREGISTER, &fdm, fdm_actual_size);
+	if (rc) {
+		pr_err("Failed to un-register - unexpected Error(%d).\n", rc);
+		return -EIO;
+	}
+
+	fadump_conf->dump_registered = 0;
+	return 0;
+}
+
+static ulong powernv_get_preserv_area_start(struct fw_dump *fadump_conf)
+{
+	return fadump_conf->rmr_destination_addr;
+}
+
+static ulong powernv_get_meta_area_start(struct fw_dump *fadump_conf)
+{
+	return (fadump_conf->rmr_destination_addr +
+		fadump_conf->rmr_source_len);
+}
+
+static int powernv_invalidate_fadump(struct fw_dump *fadump_conf)
+{
+	int rc;
+
+	rc = opal_configure_fadump(FADUMP_INVALIDATE, (void *)fdm_active,
+				   fdm_actual_size);
+	if (rc) {
+		pr_err("Failed to invalidate - unexpected Error(%d).\n", rc);
+		return -EIO;
+	}
+
+	fadump_conf->dump_active = 0;
+	fdm_active = NULL;
+	return 0;
+}
+
+/*
+ * Read CPU state dump data and convert it into ELF notes.
+ * The CPU dump starts with magic number "REGSAVE". NumCpusOffset should be
+ * used to access the data to allow for additional fields to be added without
+ * affecting compatibility. Each list of registers for a CPU starts with
+ * "CPUSTRT" and ends with "CPUEND". Each register entry is of 16 bytes,
+ * 8 Byte ASCII identifier and 8 Byte register value. The register entry
+ * with identifier "CPUSTRT" and "CPUEND" contains 4 byte cpu id as part
+ * of register value. For more details refer to PAPR document.
+ *
+ * Only for the crashing cpu we ignore the CPU dump data and get exact
+ * state from fadump crash info structure populated by first kernel at the
+ * time of crash.
+ */
+static int __init fadump_build_cpu_notes(struct fw_dump *fadump_conf)
+{
+	u32 num_cpus = 1, *note_buf;
+	struct fadump_crash_info_header *fdh = NULL;
+
+	/* Allocate buffer to hold cpu crash notes. */
+	fadump_conf->cpu_notes_buf_size = num_cpus * sizeof(note_buf_t);
+	fadump_conf->cpu_notes_buf_size =
+		PAGE_ALIGN(fadump_conf->cpu_notes_buf_size);
+	note_buf = fadump_cpu_notes_buf_alloc(fadump_conf->cpu_notes_buf_size);
+	if (!note_buf) {
+		pr_err("Failed to allocate 0x%lx bytes for cpu notes buffer\n",
+		       fadump_conf->cpu_notes_buf_size);
+		return -ENOMEM;
+	}
+	fadump_conf->cpu_notes_buf = __pa(note_buf);
+
+	pr_debug("Allocated buffer for cpu notes of size %ld at %p\n",
+		 (num_cpus * sizeof(note_buf_t)), note_buf);
+
+	if (fadump_conf->fadumphdr_addr)
+		fdh = __va(fadump_conf->fadumphdr_addr);
+
+	if (fdh && (fdh->crashing_cpu != CPU_UNKNOWN)) {
+		note_buf = fadump_regs_to_elf_notes(note_buf, &(fdh->regs));
+		final_note(note_buf);
+
+		pr_debug("Updating elfcore header (%llx) with cpu notes\n",
+			 fdh->elfcorehdr_addr);
+		fadump_update_elfcore_header(fadump_conf,
+					     __va(fdh->elfcorehdr_addr));
+	}
+
+	return 0;
+}
+
+static int __init powernv_process_fadump(struct fw_dump *fadump_conf)
+{
+	struct fadump_crash_info_header *fdh;
+	int rc = 0;
+
+	if (!fdm_active || !fadump_conf->fadumphdr_addr)
+		return -EINVAL;
+
+	/* Validate the fadump crash info header */
+	fdh = __va(fadump_conf->fadumphdr_addr);
+	if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) {
+		pr_err("Crash info header is not valid.\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * TODO: To build cpu notes, find a way to map PIR to logical id.
+	 *       Also, we may need different method for pseries and powernv.
+	 *       The currently booted kernel could have a different PIR to
+	 *       logical id mapping. So, try saving info of previous kernel's
+	 *       paca to get the right PIR to logical id mapping.
+	 */
+	rc = fadump_build_cpu_notes(fadump_conf);
+	if (rc)
+		return rc;
+
+	/*
+	 * We are done validating dump info and elfcore header is now ready
+	 * to be exported. set elfcorehdr_addr so that vmcore module will
+	 * export the elfcore header through '/proc/vmcore'.
+	 */
+	elfcorehdr_addr = fdh->elfcorehdr_addr;
+
+	return rc;
+}
+
+static void powernv_fadump_region_show(struct fw_dump *fadump_conf,
+				       struct seq_file *m)
+{
+}
+
+static void powernv_crash_fadump(const char *msg)
+{
+	int rc;
+
+	rc = opal_cec_reboot2(OPAL_REBOOT_MPIPL, msg);
+	if (rc == OPAL_UNSUPPORTED)
+		pr_emerg("Reboot type %d not supported\n", OPAL_REBOOT_MPIPL);
+	else if (rc == OPAL_HARDWARE)
+		pr_emerg("No backend support for MPIPL!\n");
+}
+
+static struct fadump_ops_t powernv_fadump_ops = {
+	.init_fadump_mem_struct	= powernv_init_fadump_mem_struct,
+	.register_fadump	= powernv_register_fadump,
+	.unregister_fadump	= powernv_unregister_fadump,
+	.get_preserv_area_start	= powernv_get_preserv_area_start,
+	.get_meta_area_start	= powernv_get_meta_area_start,
+	.invalidate_fadump	= powernv_invalidate_fadump,
+	.process_fadump		= powernv_process_fadump,
+	.fadump_region_show	= powernv_fadump_region_show,
+	.crash_fadump		= powernv_crash_fadump,
+};
+
+int __init powernv_dt_scan_fadump(struct fw_dump *fadump_conf, ulong node)
+{
+	/*
+	 * Firmware currently supports only 32-bit value for size,
+	 * align it to 1MB size.
+	 */
+	fadump_conf->max_copy_size = _ALIGN_DOWN(0xFFFFFFFF, (1 << 20));
+
+	/*
+	 * Check if dump has been initiated on last reboot.
+	 */
+	fdm_active = of_get_flat_dt_prop(node, "result-table", NULL);
+	if (fdm_active) {
+		pr_info("Firmware-assisted dump is active.\n");
+		fadump_conf->dump_active = 1;
+		update_fadump_config(fadump_conf, (void *)__pa(fdm_active));
+	}
+
+	fadump_ops = &powernv_fadump_ops;
+	fadump_conf->fadump_supported = 1;
+	fadump_conf->fadump_platform = FADUMP_PLATFORM_POWERNV;
+
+	return 1;
+}
diff --git a/arch/powerpc/platforms/powernv/powernv_fadump.h b/arch/powerpc/platforms/powernv/powernv_fadump.h
new file mode 100644
index 0000000..224a142
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/powernv_fadump.h
@@ -0,0 +1,63 @@
+/*
+ * Firmware-Assisted Dump support on POWERNV platform.
+ *
+ * Copyright 2018, IBM Corporation
+ * Author: Hari Bathini <hbathini@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef __PPC64_POWERNV_FA_DUMP_H__
+#define __PPC64_POWERNV_FA_DUMP_H__
+
+#define POWERNV_FADUMP_CPU_STATE_DATA	0x0000
+/* OPAL : 0x01 – 0x39 */
+#define POWERNV_FADUMP_OPAL_REGION	0x0001
+/* Firmware/SMF : 0x40 – 0x79 */
+#define POWERNV_FADUMP_FW_REGION	0x0040
+/* Kernel memory region : 0x80 – 0xb9 */
+#define POWERNV_FADUMP_REAL_MODE_REGION	0x0080
+/* Reserved for future use : 0xc0 – 0xff */
+#define POWERNV_FADUMP_RESERVED_REGION	0x00c0
+
+enum powernv_fadump_section_types {
+	CPU_STATE_TYPE		= 0,
+	OPAL_REGION_TYPE,
+	FW_REGION_TYPE,
+	RMR_REGION_TYPE,
+	POWERNV_SECTIONS
+};
+
+/* Starting index of RMR region in dump sections while registering */
+#define RMR_REGION_INPUT_IDX		0
+
+#define POWERNV_MAX_SECTIONS		(POWERNV_SECTIONS + \
+					 MAX_REAL_MEM_REGIONS - 1)
+
+/* Kernel Dump section info */
+struct powernv_fadump_section {
+	u8	src_type;
+	u8	reserved[7];
+	__be64	src_addr;
+	__be64	src_size;
+	__be64	dest_addr;
+	__be64	dest_size;
+};
+
+/*
+ * Firmware Assisted dump memory structure. This structure is required for
+ * registering future kernel dump with power firmware through opal call.
+ */
+struct powernv_fadump_mem_struct {
+
+	__be16	section_size;		/*sizeof(struct fadump_section) */
+	__be16	section_count;		/* number of sections */
+	__be32	reserved;
+
+	struct powernv_fadump_section	section[POWERNV_MAX_SECTIONS];
+};
+
+#endif /* __PPC64_POWERNV_FA_DUMP_H__ */
diff --git a/arch/powerpc/platforms/pseries/pseries_fadump.c b/arch/powerpc/platforms/pseries/pseries_fadump.c
index ac54501..ef7e59a 100644
--- a/arch/powerpc/platforms/pseries/pseries_fadump.c
+++ b/arch/powerpc/platforms/pseries/pseries_fadump.c
@@ -40,8 +40,12 @@ static void update_fadump_config(struct fw_dump *fadump_conf,
 		be64_to_cpu(fdm->rmr_region.destination_address);
 
 	if (fadump_conf->dump_active) {
-		fadump_conf->rmr_source_len =
-			be64_to_cpu(fdm->rmr_region.source_len);
+		fadump_conf->rmr_src_addr[0] =
+			be64_to_cpu(fdm->rmr_region.source_address);
+		fadump_conf->rmr_src_size[0] = be64_to_cpu(fdm->rmr_region.source_len);
+		fadump_conf->rmr_regions_cnt = 1;
+		fadump_conf->rmr_source_len = fadump_conf->rmr_src_size[0];
+		fadump_conf->boot_memory_hole_size = 0;
 	}
 }
 

  parent reply	other threads:[~2018-05-15  4:59 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-05-15  4:58 [RFC PATCH 0/5] Add FADump support on PowerNV platform Hari Bathini
2018-05-15  4:58 ` [RFC PATCH 1/5] powerpc/fadump: move internal fadump code to a new file Hari Bathini
2018-05-15  4:59 ` [RFC PATCH 2/5] pseries/fadump: move out platform specific support from generic code Hari Bathini
2018-05-15  4:59 ` Hari Bathini [this message]
2018-05-15  4:59 ` [RFC PATCH 4/5] powerpc/fadump: process architected register state data provided by firmware Hari Bathini
2018-05-15  4:59 ` [RFC PATCH 5/5] powerpc/powernv: export /proc/opaldump for analysing opal crashes Hari Bathini

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=152636035587.17123.1844308737319409343.stgit@hbathini.in.ibm.com \
    --to=hbathini@linux.ibm.com \
    --cc=ananth@linux.ibm.com \
    --cc=hbathini@linux.vnet.ibm.com \
    --cc=hegdevasant@linux.ibm.com \
    --cc=linuxppc-dev@ozlabs.org \
    --cc=mahesh@linux.ibm.com \
    --cc=mpe@ellerman.id.au \
    --cc=stewart@linux.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.