* [RFC PATCH 1/5] powerpc/fadump: move internal fadump code to a new file
2018-05-15 4:58 [RFC PATCH 0/5] Add FADump support on PowerNV platform Hari Bathini
@ 2018-05-15 4:58 ` Hari Bathini
2018-05-15 4:59 ` [RFC PATCH 2/5] pseries/fadump: move out platform specific support from generic code Hari Bathini
` (3 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Hari Bathini @ 2018-05-15 4:58 UTC (permalink / raw)
To: Ananth N Mavinakayanahalli, Michael Ellerman,
Mahesh J Salgaonkar, Vasant Hegde, linuxppc-dev, Stewart Smith
Refactoring fadump code means internal fadump code is referenced from
different places. For ease, move internal code to a new file.
Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
---
arch/powerpc/include/asm/fadump.h | 114 -------------------
arch/powerpc/kernel/Makefile | 2
arch/powerpc/kernel/fadump.c | 190 ++------------------------------
arch/powerpc/kernel/fadump_internal.c | 194 +++++++++++++++++++++++++++++++++
arch/powerpc/kernel/fadump_internal.h | 127 ++++++++++++++++++++++
5 files changed, 333 insertions(+), 294 deletions(-)
create mode 100644 arch/powerpc/kernel/fadump_internal.c
create mode 100644 arch/powerpc/kernel/fadump_internal.h
diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h
index 1771134..028a8ef 100644
--- a/arch/powerpc/include/asm/fadump.h
+++ b/arch/powerpc/include/asm/fadump.h
@@ -24,34 +24,6 @@
#ifdef CONFIG_FA_DUMP
-/*
- * The RMA region will be saved for later dumping when kernel crashes.
- * RMA is Real Mode Area, the first block of logical memory address owned
- * by logical partition, containing the storage that may be accessed with
- * translate off.
- */
-#define RMA_START 0x0
-#define RMA_END (ppc64_rma_size)
-
-/*
- * On some Power systems where RMO is 128MB, it still requires minimum of
- * 256MB for kernel to boot successfully. When kdump infrastructure is
- * configured to save vmcore over network, we run into OOM issue while
- * loading modules related to network setup. Hence we need aditional 64M
- * of memory to avoid OOM issue.
- */
-#define MIN_BOOT_MEM (((RMA_END < (0x1UL << 28)) ? (0x1UL << 28) : RMA_END) \
- + (0x1UL << 26))
-
-/* The upper limit percentage for user specified boot memory size (25%) */
-#define MAX_BOOT_MEM_RATIO 4
-
-#define memblock_num_regions(memblock_type) (memblock.memblock_type.cnt)
-
-/* Alignement per CMA requirement. */
-#define FADUMP_CMA_ALIGNMENT (PAGE_SIZE << \
- max_t(unsigned long, MAX_ORDER - 1, pageblock_order))
-
/* Firmware provided dump sections */
#define FADUMP_CPU_STATE_DATA 0x0001
#define FADUMP_HPTE_REGION 0x0002
@@ -60,18 +32,9 @@
/* Dump request flag */
#define FADUMP_REQUEST_FLAG 0x00000001
-/* FAD commands */
-#define FADUMP_REGISTER 1
-#define FADUMP_UNREGISTER 2
-#define FADUMP_INVALIDATE 3
-
/* Dump status flag */
#define FADUMP_ERROR_FLAG 0x2000
-#define FADUMP_CPU_ID_MASK ((1UL << 32) - 1)
-
-#define CPU_UNKNOWN (~((u32)0))
-
/* Utility macros */
#define SKIP_TO_NEXT_CPU(reg_entry) \
({ \
@@ -125,58 +88,8 @@ struct fadump_mem_struct {
struct fadump_section rmr_region;
};
-/* Firmware-assisted dump configuration details. */
-struct fw_dump {
- unsigned long cpu_state_data_size;
- unsigned long hpte_region_size;
- unsigned long boot_memory_size;
- unsigned long reserve_dump_area_start;
- unsigned long reserve_dump_area_size;
- /* cmd line option during boot */
- unsigned long reserve_bootvar;
-
- unsigned long fadumphdr_addr;
- unsigned long cpu_notes_buf;
- unsigned long cpu_notes_buf_size;
-
- int ibm_configure_kernel_dump;
-
- unsigned long fadump_enabled:1;
- unsigned long fadump_supported:1;
- unsigned long dump_active:1;
- unsigned long dump_registered:1;
-};
-
-/*
- * Copy the ascii values for first 8 characters from a string into u64
- * variable at their respective indexes.
- * e.g.
- * The string "FADMPINF" will be converted into 0x4641444d50494e46
- */
-static inline u64 str_to_u64(const char *str)
-{
- u64 val = 0;
- int i;
-
- for (i = 0; i < sizeof(val); i++)
- val = (*str) ? (val << 8) | *str++ : val << 8;
- return val;
-}
-#define STR_TO_HEX(x) str_to_u64(x)
-#define REG_ID(x) str_to_u64(x)
-
-#define FADUMP_CRASH_INFO_MAGIC STR_TO_HEX("FADMPINF")
#define REGSAVE_AREA_MAGIC STR_TO_HEX("REGSAVE")
-/* The firmware-assisted dump format.
- *
- * The register save area is an area in the partition's memory used to preserve
- * the register contents (CPU state data) for the active CPUs during a firmware
- * assisted dump. The dump format contains register save area header followed
- * by register entries. Each list of registers for a CPU starts with
- * "CPUSTRT" and ends with "CPUEND".
- */
-
/* Register save area header. */
struct fadump_reg_save_area_header {
__be64 magic_number;
@@ -184,32 +97,9 @@ struct fadump_reg_save_area_header {
__be32 num_cpu_offset;
};
-/* Register entry. */
-struct fadump_reg_entry {
- __be64 reg_id;
- __be64 reg_value;
-};
-
-/* fadump crash info structure */
-struct fadump_crash_info_header {
- u64 magic_number;
- u64 elfcorehdr_addr;
- u32 crashing_cpu;
- struct pt_regs regs;
- struct cpumask online_mask;
-};
-
-/* Crash memory ranges */
-#define INIT_CRASHMEM_RANGES (INIT_MEMBLOCK_REGIONS + 2)
-
-struct fad_crash_memory_ranges {
- unsigned long long base;
- unsigned long long size;
-};
-
extern int is_fadump_memory_area(u64 addr, ulong size);
-extern int early_init_dt_scan_fw_dump(unsigned long node,
- const char *uname, int depth, void *data);
+extern int early_init_dt_scan_fw_dump(unsigned long node, const char *uname,
+ int depth, void *data);
extern int fadump_reserve_mem(void);
extern int setup_fadump(void);
extern int is_fadump_active(void);
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 2b4c40b2..c653ecd 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -61,7 +61,7 @@ obj-$(CONFIG_EEH) += eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \
eeh_driver.o eeh_event.o eeh_sysfs.o
obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
-obj-$(CONFIG_FA_DUMP) += fadump.o
+obj-$(CONFIG_FA_DUMP) += fadump.o fadump_internal.o
ifeq ($(CONFIG_PPC32),y)
obj-$(CONFIG_E500) += idle_e500.o
endif
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 91e10cb..75cad00 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -43,6 +43,8 @@
#include <asm/fadump.h>
#include <asm/setup.h>
+#include "fadump_internal.h"
+
static struct fw_dump fw_dump;
static struct fadump_mem_struct fdm;
static const struct fadump_mem_struct *fdm_active;
@@ -109,8 +111,8 @@ static int __init fadump_cma_init(void) { return 1; }
#endif /* CONFIG_CMA */
/* Scan the Firmware Assisted dump configuration details. */
-int __init early_init_dt_scan_fw_dump(unsigned long node,
- const char *uname, int depth, void *data)
+int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname,
+ int depth, void *data)
{
const __be32 *sections;
int i, num_sections;
@@ -201,68 +203,6 @@ int is_fadump_active(void)
return fw_dump.dump_active;
}
-/*
- * Returns 1, if there are no holes in boot memory area,
- * 0 otherwise.
- */
-static int is_boot_memory_area_contiguous(void)
-{
- struct memblock_region *reg;
- unsigned long tstart, tend;
- unsigned long start_pfn = PHYS_PFN(RMA_START);
- unsigned long end_pfn = PHYS_PFN(RMA_START + fw_dump.boot_memory_size);
- unsigned int ret = 0;
-
- for_each_memblock(memory, reg) {
- tstart = max(start_pfn, memblock_region_memory_base_pfn(reg));
- tend = min(end_pfn, memblock_region_memory_end_pfn(reg));
- if (tstart < tend) {
- /* Memory hole from start_pfn to tstart */
- if (tstart > start_pfn)
- break;
-
- if (tend == end_pfn) {
- ret = 1;
- break;
- }
-
- start_pfn = tend + 1;
- }
- }
-
- return ret;
-}
-
-/*
- * Returns 1, if there are no holes in reserved memory area,
- * 0 otherwise.
- */
-static int is_reserved_memory_area_contiguous(void)
-{
- struct memblock_region *reg;
- unsigned long start, end;
- unsigned long d_start = fw_dump.reserve_dump_area_start;
- unsigned long d_end = d_start + fw_dump.reserve_dump_area_size;
- int ret = 0;
-
- for_each_memblock(memory, reg) {
- start = max(d_start, (unsigned long)reg->base);
- end = min(d_end, (unsigned long)(reg->base + reg->size));
- if (d_start < end) {
- /* Memory hole from d_start to start */
- if (start > d_start)
- break;
-
- if (end == d_end) {
- ret = 1;
- break;
- }
- d_start = end + 1;
- }
- }
- return ret;
-}
-
/* Print firmware assisted dump configurations for debugging purpose. */
static void fadump_show_config(void)
{
@@ -622,10 +562,10 @@ static int register_fw_dump(struct fadump_mem_struct *fdm)
" dump. Hardware Error(%d).\n", rc);
break;
case -3:
- if (!is_boot_memory_area_contiguous())
+ if (!is_boot_memory_area_contiguous(&fw_dump))
pr_err("Can't have holes in boot memory area while "
"registering fadump\n");
- else if (!is_reserved_memory_area_contiguous())
+ else if (!is_reserved_memory_area_contiguous(&fw_dump))
pr_err("Can't have holes in reserved memory area while"
" registering fadump\n");
@@ -695,52 +635,6 @@ void crash_fadump(struct pt_regs *regs, const char *str)
rtas_os_term((char *)str);
}
-#define GPR_MASK 0xffffff0000000000
-static inline int fadump_gpr_index(u64 id)
-{
- int i = -1;
- char str[3];
-
- if ((id & GPR_MASK) == REG_ID("GPR")) {
- /* get the digits at the end */
- id &= ~GPR_MASK;
- id >>= 24;
- str[2] = '\0';
- str[1] = id & 0xff;
- str[0] = (id >> 8) & 0xff;
- sscanf(str, "%d", &i);
- if (i > 31)
- i = -1;
- }
- return i;
-}
-
-static inline void fadump_set_regval(struct pt_regs *regs, u64 reg_id,
- u64 reg_val)
-{
- int i;
-
- i = fadump_gpr_index(reg_id);
- if (i >= 0)
- regs->gpr[i] = (unsigned long)reg_val;
- else if (reg_id == REG_ID("NIA"))
- regs->nip = (unsigned long)reg_val;
- else if (reg_id == REG_ID("MSR"))
- regs->msr = (unsigned long)reg_val;
- else if (reg_id == REG_ID("CTR"))
- regs->ctr = (unsigned long)reg_val;
- else if (reg_id == REG_ID("LR"))
- regs->link = (unsigned long)reg_val;
- else if (reg_id == REG_ID("XER"))
- regs->xer = (unsigned long)reg_val;
- else if (reg_id == REG_ID("CR"))
- regs->ccr = (unsigned long)reg_val;
- else if (reg_id == REG_ID("DAR"))
- regs->dar = (unsigned long)reg_val;
- else if (reg_id == REG_ID("DSISR"))
- regs->dsisr = (unsigned long)reg_val;
-}
-
static struct fadump_reg_entry*
fadump_read_registers(struct fadump_reg_entry *reg_entry, struct pt_regs *regs)
{
@@ -755,72 +649,6 @@ fadump_read_registers(struct fadump_reg_entry *reg_entry, struct pt_regs *regs)
return reg_entry;
}
-static u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
-{
- struct elf_prstatus prstatus;
-
- memset(&prstatus, 0, sizeof(prstatus));
- /*
- * FIXME: How do i get PID? Do I really need it?
- * prstatus.pr_pid = ????
- */
- elf_core_copy_kernel_regs(&prstatus.pr_reg, regs);
- buf = append_elf_note(buf, CRASH_CORE_NOTE_NAME, NT_PRSTATUS,
- &prstatus, sizeof(prstatus));
- return buf;
-}
-
-static void fadump_update_elfcore_header(char *bufp)
-{
- struct elfhdr *elf;
- struct elf_phdr *phdr;
-
- elf = (struct elfhdr *)bufp;
- bufp += sizeof(struct elfhdr);
-
- /* First note is a place holder for cpu notes info. */
- phdr = (struct elf_phdr *)bufp;
-
- if (phdr->p_type == PT_NOTE) {
- phdr->p_paddr = fw_dump.cpu_notes_buf;
- phdr->p_offset = phdr->p_paddr;
- phdr->p_filesz = fw_dump.cpu_notes_buf_size;
- phdr->p_memsz = fw_dump.cpu_notes_buf_size;
- }
- return;
-}
-
-static void *fadump_cpu_notes_buf_alloc(unsigned long size)
-{
- void *vaddr;
- struct page *page;
- unsigned long order, count, i;
-
- order = get_order(size);
- vaddr = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, order);
- if (!vaddr)
- return NULL;
-
- count = 1 << order;
- page = virt_to_page(vaddr);
- for (i = 0; i < count; i++)
- SetPageReserved(page + i);
- return vaddr;
-}
-
-static void fadump_cpu_notes_buf_free(unsigned long vaddr, unsigned long size)
-{
- struct page *page;
- unsigned long order, count, i;
-
- order = get_order(size);
- count = 1 << order;
- page = virt_to_page(vaddr);
- for (i = 0; i < count; i++)
- ClearPageReserved(page + i);
- __free_pages(page, order);
-}
-
/*
* Read CPU state dump data and convert it into ELF notes.
* The CPU dump starts with magic number "REGSAVE". NumCpusOffset should be
@@ -910,9 +738,9 @@ static int __init fadump_build_cpu_notes(const struct fadump_mem_struct *fdm)
final_note(note_buf);
if (fdh) {
- pr_debug("Updating elfcore header (%llx) with cpu notes\n",
- fdh->elfcorehdr_addr);
- fadump_update_elfcore_header((char *)__va(fdh->elfcorehdr_addr));
+ addr = fdh->elfcorehdr_addr;
+ pr_debug("Updating elfcore header(%lx) with cpu notes\n", addr);
+ fadump_update_elfcore_header(&fw_dump, (char *)__va(addr));
}
return 0;
diff --git a/arch/powerpc/kernel/fadump_internal.c b/arch/powerpc/kernel/fadump_internal.c
new file mode 100644
index 0000000..8259f22
--- /dev/null
+++ b/arch/powerpc/kernel/fadump_internal.c
@@ -0,0 +1,194 @@
+/*
+ * Firmware-Assisted Dump internal code.
+ *
+ * Copyright 2018, IBM Corporation
+ * Author: Hari Bathini <hbathini@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/memblock.h>
+#include <linux/elf.h>
+#include <linux/mm.h>
+#include <linux/crash_core.h>
+
+#include "fadump_internal.h"
+
+void *fadump_cpu_notes_buf_alloc(unsigned long size)
+{
+ void *vaddr;
+ struct page *page;
+ unsigned long order, count, i;
+
+ order = get_order(size);
+ vaddr = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, order);
+ if (!vaddr)
+ return NULL;
+
+ count = 1 << order;
+ page = virt_to_page(vaddr);
+ for (i = 0; i < count; i++)
+ SetPageReserved(page + i);
+ return vaddr;
+}
+
+void fadump_cpu_notes_buf_free(unsigned long vaddr, unsigned long size)
+{
+ struct page *page;
+ unsigned long order, count, i;
+
+ order = get_order(size);
+ count = 1 << order;
+ page = virt_to_page(vaddr);
+ for (i = 0; i < count; i++)
+ ClearPageReserved(page + i);
+ __free_pages(page, order);
+}
+
+#define GPR_MASK 0xffffff0000000000
+static inline int fadump_gpr_index(u64 id)
+{
+ int i = -1;
+ char str[3];
+
+ if ((id & GPR_MASK) == REG_ID("GPR")) {
+ /* get the digits at the end */
+ id &= ~GPR_MASK;
+ id >>= 24;
+ str[2] = '\0';
+ str[1] = id & 0xff;
+ str[0] = (id >> 8) & 0xff;
+ if (kstrtoint(str, 10, &i))
+ i = -EINVAL;
+ if (i > 31)
+ i = -1;
+ }
+ return i;
+}
+
+void fadump_set_regval(struct pt_regs *regs, u64 reg_id, u64 reg_val)
+{
+ int i;
+
+ i = fadump_gpr_index(reg_id);
+ if (i >= 0)
+ regs->gpr[i] = (unsigned long)reg_val;
+ else if (reg_id == REG_ID("NIA"))
+ regs->nip = (unsigned long)reg_val;
+ else if (reg_id == REG_ID("MSR"))
+ regs->msr = (unsigned long)reg_val;
+ else if (reg_id == REG_ID("CTR"))
+ regs->ctr = (unsigned long)reg_val;
+ else if (reg_id == REG_ID("LR"))
+ regs->link = (unsigned long)reg_val;
+ else if (reg_id == REG_ID("XER"))
+ regs->xer = (unsigned long)reg_val;
+ else if (reg_id == REG_ID("CR"))
+ regs->ccr = (unsigned long)reg_val;
+ else if (reg_id == REG_ID("DAR"))
+ regs->dar = (unsigned long)reg_val;
+ else if (reg_id == REG_ID("DSISR"))
+ regs->dsisr = (unsigned long)reg_val;
+}
+
+u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
+{
+ struct elf_prstatus prstatus;
+
+ memset(&prstatus, 0, sizeof(prstatus));
+ /*
+ * FIXME: How do i get PID? Do I really need it?
+ * prstatus.pr_pid = ????
+ */
+ elf_core_copy_kernel_regs(&prstatus.pr_reg, regs);
+ buf = append_elf_note(buf, CRASH_CORE_NOTE_NAME, NT_PRSTATUS,
+ &prstatus, sizeof(prstatus));
+ return buf;
+}
+
+void fadump_update_elfcore_header(struct fw_dump *fadump_conf, char *bufp)
+{
+ struct elfhdr *elf;
+ struct elf_phdr *phdr;
+
+ elf = (struct elfhdr *)bufp;
+ bufp += sizeof(struct elfhdr);
+
+ /* First note is a place holder for cpu notes info. */
+ phdr = (struct elf_phdr *)bufp;
+
+ if (phdr->p_type == PT_NOTE) {
+ phdr->p_paddr = fadump_conf->cpu_notes_buf;
+ phdr->p_offset = phdr->p_paddr;
+ phdr->p_memsz = fadump_conf->cpu_notes_buf_size;
+ phdr->p_filesz = phdr->p_memsz;
+ }
+}
+
+/*
+ * Returns 1, if there are no holes in boot memory area,
+ * 0 otherwise.
+ */
+int is_boot_memory_area_contiguous(struct fw_dump *fadump_conf)
+{
+ struct memblock_region *reg;
+ unsigned long tstart, tend;
+ unsigned long start_pfn = PHYS_PFN(RMA_START);
+ unsigned long end_pfn = PHYS_PFN(RMA_START +
+ fadump_conf->boot_memory_size);
+ unsigned int ret = 0;
+
+ for_each_memblock(memory, reg) {
+ tstart = max(start_pfn, memblock_region_memory_base_pfn(reg));
+ tend = min(end_pfn, memblock_region_memory_end_pfn(reg));
+ if (tstart < tend) {
+ /* Memory hole from start_pfn to tstart */
+ if (tstart > start_pfn)
+ break;
+
+ if (tend == end_pfn) {
+ ret = 1;
+ break;
+ }
+
+ start_pfn = tend + 1;
+ }
+ }
+
+ return ret;
+}
+
+/*
+ * Returns 1, if there are no holes in reserved memory area,
+ * 0 otherwise.
+ */
+int is_reserved_memory_area_contiguous(struct fw_dump *fadump_conf)
+{
+ struct memblock_region *reg;
+ unsigned long start, end;
+ unsigned long d_start = fadump_conf->reserve_dump_area_start;
+ unsigned long d_end = d_start + fadump_conf->reserve_dump_area_size;
+ int ret = 0;
+
+ for_each_memblock(memory, reg) {
+ start = max_t(unsigned long, d_start, reg->base);
+ end = min_t(unsigned long, d_end, (reg->base + reg->size));
+ if (d_start < end) {
+ /* Memory hole from d_start to start */
+ if (start > d_start)
+ break;
+
+ if (end == d_end) {
+ ret = 1;
+ break;
+ }
+
+ d_start = end + 1;
+ }
+ }
+
+ return ret;
+}
diff --git a/arch/powerpc/kernel/fadump_internal.h b/arch/powerpc/kernel/fadump_internal.h
new file mode 100644
index 0000000..4c26eda
--- /dev/null
+++ b/arch/powerpc/kernel/fadump_internal.h
@@ -0,0 +1,127 @@
+/*
+ * Firmware-Assisted Dump internal code.
+ *
+ * Copyright 2018, IBM Corporation
+ * Author: Hari Bathini <hbathini@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef __PPC64_FA_DUMP_INTERNAL_H__
+#define __PPC64_FA_DUMP_INTERNAL_H__
+
+/*
+ * The RMA region will be saved for later dumping when kernel crashes.
+ * RMA is Real Mode Area, the first block of logical memory address owned
+ * by logical partition, containing the storage that may be accessed with
+ * translate off.
+ */
+#define RMA_START 0x0
+#define RMA_END (ppc64_rma_size)
+
+/*
+ * On some Power systems where RMO is 128MB, it still requires minimum of
+ * 256MB for kernel to boot successfully. When kdump infrastructure is
+ * configured to save vmcore over network, we run into OOM issue while
+ * loading modules related to network setup. Hence we need additional 64M
+ * of memory to avoid OOM issue.
+ */
+#define MIN_BOOT_MEM (((RMA_END < (0x1UL << 28)) ? (0x1UL << 28) : RMA_END) \
+ + (0x1UL << 26))
+
+/* The upper limit percentage for user specified boot memory size (25%) */
+#define MAX_BOOT_MEM_RATIO 4
+
+#define memblock_num_regions(memblock_type) (memblock.memblock_type.cnt)
+
+/* Alignment per CMA requirement. */
+#define FADUMP_CMA_ALIGNMENT (PAGE_SIZE << \
+ max_t(unsigned long, MAX_ORDER - 1, \
+ pageblock_order))
+
+/* FAD commands */
+#define FADUMP_REGISTER 1
+#define FADUMP_UNREGISTER 2
+#define FADUMP_INVALIDATE 3
+
+#define FADUMP_CPU_ID_MASK ((1UL << 32) - 1)
+
+#define CPU_UNKNOWN (~((u32)0))
+
+/*
+ * Copy the ascii values for first 8 characters from a string into u64
+ * variable at their respective indexes.
+ * e.g.
+ * The string "FADMPINF" will be converted into 0x4641444d50494e46
+ */
+static inline u64 str_to_u64(const char *str)
+{
+ u64 val = 0;
+ int i;
+
+ for (i = 0; i < sizeof(val); i++)
+ val = (*str) ? (val << 8) | *str++ : val << 8;
+ return val;
+}
+#define STR_TO_HEX(x) str_to_u64(x)
+#define REG_ID(x) str_to_u64(x)
+
+#define FADUMP_CRASH_INFO_MAGIC STR_TO_HEX("FADMPINF")
+
+/* Register entry. */
+struct fadump_reg_entry {
+ __be64 reg_id;
+ __be64 reg_value;
+};
+
+/* fadump crash info structure */
+struct fadump_crash_info_header {
+ u64 magic_number;
+ u64 elfcorehdr_addr;
+ u32 crashing_cpu;
+ struct pt_regs regs;
+ struct cpumask online_mask;
+};
+
+/* Crash memory ranges */
+#define INIT_CRASHMEM_RANGES (INIT_MEMBLOCK_REGIONS + 2)
+
+struct fad_crash_memory_ranges {
+ unsigned long long base;
+ unsigned long long size;
+};
+
+/* Firmware-assisted dump configuration details. */
+struct fw_dump {
+ unsigned long cpu_state_data_size;
+ unsigned long hpte_region_size;
+ unsigned long boot_memory_size;
+ unsigned long reserve_dump_area_start;
+ unsigned long reserve_dump_area_size;
+ /* cmd line option during boot */
+ unsigned long reserve_bootvar;
+
+ unsigned long fadumphdr_addr;
+ unsigned long cpu_notes_buf;
+ unsigned long cpu_notes_buf_size;
+
+ int ibm_configure_kernel_dump;
+
+ unsigned long fadump_enabled:1;
+ unsigned long fadump_supported:1;
+ unsigned long dump_active:1;
+ unsigned long dump_registered:1;
+};
+
+int is_boot_memory_area_contiguous(struct fw_dump *fadump_conf);
+int is_reserved_memory_area_contiguous(struct fw_dump *fadump_conf);
+void *fadump_cpu_notes_buf_alloc(unsigned long size);
+void fadump_cpu_notes_buf_free(unsigned long vaddr, unsigned long size);
+void fadump_set_regval(struct pt_regs *regs, u64 reg_id, u64 reg_val);
+u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs);
+void fadump_update_elfcore_header(struct fw_dump *fadump_config, char *bufp);
+
+#endif /* __PPC64_FA_DUMP_INTERNAL_H__ */
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [RFC PATCH 2/5] pseries/fadump: move out platform specific support from generic code
2018-05-15 4:58 [RFC PATCH 0/5] Add FADump support on PowerNV platform Hari Bathini
2018-05-15 4:58 ` [RFC PATCH 1/5] powerpc/fadump: move internal fadump code to a new file Hari Bathini
@ 2018-05-15 4:59 ` Hari Bathini
2018-05-15 4:59 ` [RFC PATCH 3/5] powerpc/fadump: enable fadump support on powernv platform Hari Bathini
` (2 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Hari Bathini @ 2018-05-15 4:59 UTC (permalink / raw)
To: Ananth N Mavinakayanahalli, Michael Ellerman,
Mahesh J Salgaonkar, Vasant Hegde, linuxppc-dev, Stewart Smith
Introduce callbacks for platform specific operations like register,
unregister, invalidate & such, and move pseries specific code into
platform code.
Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
---
arch/powerpc/include/asm/fadump.h | 71 ---
arch/powerpc/kernel/fadump.c | 502 ++--------------------
arch/powerpc/kernel/fadump_internal.h | 33 +
arch/powerpc/platforms/pseries/Makefile | 1
arch/powerpc/platforms/pseries/pseries_fadump.c | 528 +++++++++++++++++++++++
arch/powerpc/platforms/pseries/pseries_fadump.h | 96 ++++
6 files changed, 696 insertions(+), 535 deletions(-)
create mode 100644 arch/powerpc/platforms/pseries/pseries_fadump.c
create mode 100644 arch/powerpc/platforms/pseries/pseries_fadump.h
diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h
index 028a8ef..db9465f 100644
--- a/arch/powerpc/include/asm/fadump.h
+++ b/arch/powerpc/include/asm/fadump.h
@@ -24,79 +24,8 @@
#ifdef CONFIG_FA_DUMP
-/* Firmware provided dump sections */
-#define FADUMP_CPU_STATE_DATA 0x0001
-#define FADUMP_HPTE_REGION 0x0002
-#define FADUMP_REAL_MODE_REGION 0x0011
-
-/* Dump request flag */
-#define FADUMP_REQUEST_FLAG 0x00000001
-
-/* Dump status flag */
-#define FADUMP_ERROR_FLAG 0x2000
-
-/* Utility macros */
-#define SKIP_TO_NEXT_CPU(reg_entry) \
-({ \
- while (be64_to_cpu(reg_entry->reg_id) != REG_ID("CPUEND")) \
- reg_entry++; \
- reg_entry++; \
-})
-
extern int crashing_cpu;
-/* Kernel Dump section info */
-struct fadump_section {
- __be32 request_flag;
- __be16 source_data_type;
- __be16 error_flags;
- __be64 source_address;
- __be64 source_len;
- __be64 bytes_dumped;
- __be64 destination_address;
-};
-
-/* ibm,configure-kernel-dump header. */
-struct fadump_section_header {
- __be32 dump_format_version;
- __be16 dump_num_sections;
- __be16 dump_status_flag;
- __be32 offset_first_dump_section;
-
- /* Fields for disk dump option. */
- __be32 dd_block_size;
- __be64 dd_block_offset;
- __be64 dd_num_blocks;
- __be32 dd_offset_disk_path;
-
- /* Maximum time allowed to prevent an automatic dump-reboot. */
- __be32 max_time_auto;
-};
-
-/*
- * Firmware Assisted dump memory structure. This structure is required for
- * registering future kernel dump with power firmware through rtas call.
- *
- * No disk dump option. Hence disk dump path string section is not included.
- */
-struct fadump_mem_struct {
- struct fadump_section_header header;
-
- /* Kernel dump sections */
- struct fadump_section cpu_state_data;
- struct fadump_section hpte_region;
- struct fadump_section rmr_region;
-};
-
-#define REGSAVE_AREA_MAGIC STR_TO_HEX("REGSAVE")
-
-/* Register save area header. */
-struct fadump_reg_save_area_header {
- __be64 magic_number;
- __be32 version;
- __be32 num_cpu_offset;
-};
-
extern int is_fadump_memory_area(u64 addr, ulong size);
extern int early_init_dt_scan_fw_dump(unsigned long node, const char *uname,
int depth, void *data);
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 75cad00..88fafe1 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -39,19 +39,19 @@
#include <asm/debugfs.h>
#include <asm/page.h>
#include <asm/prom.h>
-#include <asm/rtas.h>
#include <asm/fadump.h>
#include <asm/setup.h>
#include "fadump_internal.h"
static struct fw_dump fw_dump;
-static struct fadump_mem_struct fdm;
-static const struct fadump_mem_struct *fdm_active;
#ifdef CONFIG_CMA
static struct cma *fadump_cma;
#endif
+/* Firmware-Assisted Dump operations for this machine */
+struct fadump_ops_t *fadump_ops;
+
static DEFINE_MUTEX(fadump_mutex);
struct fad_crash_memory_ranges crash_memory_ranges[INIT_CRASHMEM_RANGES];
int crash_mem_ranges;
@@ -114,63 +114,13 @@ static int __init fadump_cma_init(void) { return 1; }
int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname,
int depth, void *data)
{
- const __be32 *sections;
- int i, num_sections;
- int size;
- const __be32 *token;
-
- if (depth != 1 || strcmp(uname, "rtas") != 0)
+ if (depth != 1)
return 0;
- /*
- * Check if Firmware Assisted dump is supported. if yes, check
- * if dump has been initiated on last reboot.
- */
- token = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL);
- if (!token)
- return 1;
-
- fw_dump.fadump_supported = 1;
- fw_dump.ibm_configure_kernel_dump = be32_to_cpu(*token);
-
- /*
- * The 'ibm,kernel-dump' rtas node is present only if there is
- * dump data waiting for us.
- */
- fdm_active = of_get_flat_dt_prop(node, "ibm,kernel-dump", NULL);
- if (fdm_active)
- fw_dump.dump_active = 1;
-
- /* Get the sizes required to store dump data for the firmware provided
- * dump sections.
- * For each dump section type supported, a 32bit cell which defines
- * the ID of a supported section followed by two 32 bit cells which
- * gives teh size of the section in bytes.
- */
- sections = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump-sizes",
- &size);
-
- if (!sections)
- return 1;
-
- num_sections = size / (3 * sizeof(u32));
-
- for (i = 0; i < num_sections; i++, sections += 3) {
- u32 type = (u32)of_read_number(sections, 1);
+ if (strcmp(uname, "rtas") == 0)
+ return pseries_dt_scan_fadump(&fw_dump, node);
- switch (type) {
- case FADUMP_CPU_STATE_DATA:
- fw_dump.cpu_state_data_size =
- of_read_ulong(§ions[1], 2);
- break;
- case FADUMP_HPTE_REGION:
- fw_dump.hpte_region_size =
- of_read_ulong(§ions[1], 2);
- break;
- }
- }
-
- return 1;
+ return 0;
}
/*
@@ -222,61 +172,6 @@ static void fadump_show_config(void)
pr_debug("Boot memory size : %lx\n", fw_dump.boot_memory_size);
}
-static unsigned long init_fadump_mem_struct(struct fadump_mem_struct *fdm,
- unsigned long addr)
-{
- if (!fdm)
- return 0;
-
- memset(fdm, 0, sizeof(struct fadump_mem_struct));
- addr = addr & PAGE_MASK;
-
- fdm->header.dump_format_version = cpu_to_be32(0x00000001);
- fdm->header.dump_num_sections = cpu_to_be16(3);
- fdm->header.dump_status_flag = 0;
- fdm->header.offset_first_dump_section =
- cpu_to_be32((u32)offsetof(struct fadump_mem_struct, cpu_state_data));
-
- /*
- * Fields for disk dump option.
- * We are not using disk dump option, hence set these fields to 0.
- */
- fdm->header.dd_block_size = 0;
- fdm->header.dd_block_offset = 0;
- fdm->header.dd_num_blocks = 0;
- fdm->header.dd_offset_disk_path = 0;
-
- /* set 0 to disable an automatic dump-reboot. */
- fdm->header.max_time_auto = 0;
-
- /* Kernel dump sections */
- /* cpu state data section. */
- fdm->cpu_state_data.request_flag = cpu_to_be32(FADUMP_REQUEST_FLAG);
- fdm->cpu_state_data.source_data_type = cpu_to_be16(FADUMP_CPU_STATE_DATA);
- fdm->cpu_state_data.source_address = 0;
- fdm->cpu_state_data.source_len = cpu_to_be64(fw_dump.cpu_state_data_size);
- fdm->cpu_state_data.destination_address = cpu_to_be64(addr);
- addr += fw_dump.cpu_state_data_size;
-
- /* hpte region section */
- fdm->hpte_region.request_flag = cpu_to_be32(FADUMP_REQUEST_FLAG);
- fdm->hpte_region.source_data_type = cpu_to_be16(FADUMP_HPTE_REGION);
- fdm->hpte_region.source_address = 0;
- fdm->hpte_region.source_len = cpu_to_be64(fw_dump.hpte_region_size);
- fdm->hpte_region.destination_address = cpu_to_be64(addr);
- addr += fw_dump.hpte_region_size;
-
- /* RMA region section */
- fdm->rmr_region.request_flag = cpu_to_be32(FADUMP_REQUEST_FLAG);
- fdm->rmr_region.source_data_type = cpu_to_be16(FADUMP_REAL_MODE_REGION);
- fdm->rmr_region.source_address = cpu_to_be64(RMA_START);
- fdm->rmr_region.source_len = cpu_to_be64(fw_dump.boot_memory_size);
- fdm->rmr_region.destination_address = cpu_to_be64(addr);
- addr += fw_dump.boot_memory_size;
-
- return addr;
-}
-
/**
* fadump_calculate_reserve_size(): reserve variable boot area 5% of System RAM
*
@@ -407,16 +302,19 @@ int __init fadump_reserve_mem(void)
* If dump is active then we have already calculated the size during
* first kernel.
*/
- if (fdm_active)
- fw_dump.boot_memory_size = be64_to_cpu(fdm_active->rmr_region.source_len);
+ if (fw_dump.dump_active)
+ fw_dump.boot_memory_size = fw_dump.rmr_source_len;
else {
fw_dump.boot_memory_size = fadump_calculate_reserve_size();
#ifdef CONFIG_CMA
fw_dump.boot_memory_size = ALIGN(fw_dump.boot_memory_size,
- FADUMP_CMA_ALIGNMENT);
+ FADUMP_CMA_ALIGNMENT);
#endif
+ fw_dump.rmr_source_len = fw_dump.boot_memory_size;
}
+ size = get_fadump_area_size();
+
/*
* Calculate the memory boundary.
* If memory_limit is less than actual memory boundary then reserve
@@ -425,7 +323,6 @@ int __init fadump_reserve_mem(void)
* specified memory_limit.
*/
if (memory_limit && memory_limit < memblock_end_of_DRAM()) {
- size = get_fadump_area_size();
if ((memory_limit + size) < memblock_end_of_DRAM())
memory_limit += size;
else
@@ -439,8 +336,6 @@ int __init fadump_reserve_mem(void)
memory_boundary = memblock_end_of_DRAM();
if (fw_dump.dump_active) {
- pr_info("Firmware-assisted dump is active.\n");
-
#ifdef CONFIG_HUGETLB_PAGE
/*
* FADump capture kernel doesn't care much about hugepages.
@@ -460,14 +355,11 @@ int __init fadump_reserve_mem(void)
fadump_reserve_crash_area(base, size);
fw_dump.fadumphdr_addr =
- be64_to_cpu(fdm_active->rmr_region.destination_address) +
- be64_to_cpu(fdm_active->rmr_region.source_len);
- pr_debug("fadumphdr_addr = %pa\n", &fw_dump.fadumphdr_addr);
+ fadump_ops->get_meta_area_start(&fw_dump);
+ pr_debug("fadumphdr_addr = %#016lx\n", fw_dump.fadumphdr_addr);
fw_dump.reserve_dump_area_start = base;
fw_dump.reserve_dump_area_size = size;
} else {
- size = get_fadump_area_size();
-
/*
* Reserve memory at an offset closer to bottom of the RAM to
* minimize the impact of memory hot-remove operation. We can't
@@ -533,62 +425,6 @@ static int __init early_fadump_reserve_mem(char *p)
}
early_param("fadump_reserve_mem", early_fadump_reserve_mem);
-static int register_fw_dump(struct fadump_mem_struct *fdm)
-{
- int rc, err;
- unsigned int wait_time;
-
- pr_debug("Registering for firmware-assisted kernel dump...\n");
-
- /* TODO: Add upper time limit for the delay */
- do {
- rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL,
- FADUMP_REGISTER, fdm,
- sizeof(struct fadump_mem_struct));
-
- wait_time = rtas_busy_delay_time(rc);
- if (wait_time)
- mdelay(wait_time);
-
- } while (wait_time);
-
- err = -EIO;
- switch (rc) {
- default:
- pr_err("Failed to register. Unknown Error(%d).\n", rc);
- break;
- case -1:
- printk(KERN_ERR "Failed to register firmware-assisted kernel"
- " dump. Hardware Error(%d).\n", rc);
- break;
- case -3:
- if (!is_boot_memory_area_contiguous(&fw_dump))
- pr_err("Can't have holes in boot memory area while "
- "registering fadump\n");
- else if (!is_reserved_memory_area_contiguous(&fw_dump))
- pr_err("Can't have holes in reserved memory area while"
- " registering fadump\n");
-
- printk(KERN_ERR "Failed to register firmware-assisted kernel"
- " dump. Parameter Error(%d).\n", rc);
- err = -EINVAL;
- break;
- case -9:
- printk(KERN_ERR "firmware-assisted kernel dump is already "
- " registered.");
- fw_dump.dump_registered = 1;
- err = -EEXIST;
- break;
- case 0:
- printk(KERN_INFO "firmware-assisted kernel dump registration"
- " is successful\n");
- fw_dump.dump_registered = 1;
- err = 0;
- break;
- }
- return err;
-}
-
void crash_fadump(struct pt_regs *regs, const char *str)
{
struct fadump_crash_info_header *fdh = NULL;
@@ -631,173 +467,7 @@ void crash_fadump(struct pt_regs *regs, const char *str)
fdh->online_mask = *cpu_online_mask;
- /* Call ibm,os-term rtas call to trigger firmware assisted dump */
- rtas_os_term((char *)str);
-}
-
-static struct fadump_reg_entry*
-fadump_read_registers(struct fadump_reg_entry *reg_entry, struct pt_regs *regs)
-{
- memset(regs, 0, sizeof(struct pt_regs));
-
- while (be64_to_cpu(reg_entry->reg_id) != REG_ID("CPUEND")) {
- fadump_set_regval(regs, be64_to_cpu(reg_entry->reg_id),
- be64_to_cpu(reg_entry->reg_value));
- reg_entry++;
- }
- reg_entry++;
- return reg_entry;
-}
-
-/*
- * Read CPU state dump data and convert it into ELF notes.
- * The CPU dump starts with magic number "REGSAVE". NumCpusOffset should be
- * used to access the data to allow for additional fields to be added without
- * affecting compatibility. Each list of registers for a CPU starts with
- * "CPUSTRT" and ends with "CPUEND". Each register entry is of 16 bytes,
- * 8 Byte ASCII identifier and 8 Byte register value. The register entry
- * with identifier "CPUSTRT" and "CPUEND" contains 4 byte cpu id as part
- * of register value. For more details refer to PAPR document.
- *
- * Only for the crashing cpu we ignore the CPU dump data and get exact
- * state from fadump crash info structure populated by first kernel at the
- * time of crash.
- */
-static int __init fadump_build_cpu_notes(const struct fadump_mem_struct *fdm)
-{
- struct fadump_reg_save_area_header *reg_header;
- struct fadump_reg_entry *reg_entry;
- struct fadump_crash_info_header *fdh = NULL;
- void *vaddr;
- unsigned long addr;
- u32 num_cpus, *note_buf;
- struct pt_regs regs;
- int i, rc = 0, cpu = 0;
-
- if (!fdm->cpu_state_data.bytes_dumped)
- return -EINVAL;
-
- addr = be64_to_cpu(fdm->cpu_state_data.destination_address);
- vaddr = __va(addr);
-
- reg_header = vaddr;
- if (be64_to_cpu(reg_header->magic_number) != REGSAVE_AREA_MAGIC) {
- printk(KERN_ERR "Unable to read register save area.\n");
- return -ENOENT;
- }
- pr_debug("--------CPU State Data------------\n");
- pr_debug("Magic Number: %llx\n", be64_to_cpu(reg_header->magic_number));
- pr_debug("NumCpuOffset: %x\n", be32_to_cpu(reg_header->num_cpu_offset));
-
- vaddr += be32_to_cpu(reg_header->num_cpu_offset);
- num_cpus = be32_to_cpu(*((__be32 *)(vaddr)));
- pr_debug("NumCpus : %u\n", num_cpus);
- vaddr += sizeof(u32);
- reg_entry = (struct fadump_reg_entry *)vaddr;
-
- /* Allocate buffer to hold cpu crash notes. */
- fw_dump.cpu_notes_buf_size = num_cpus * sizeof(note_buf_t);
- fw_dump.cpu_notes_buf_size = PAGE_ALIGN(fw_dump.cpu_notes_buf_size);
- note_buf = fadump_cpu_notes_buf_alloc(fw_dump.cpu_notes_buf_size);
- if (!note_buf) {
- printk(KERN_ERR "Failed to allocate 0x%lx bytes for "
- "cpu notes buffer\n", fw_dump.cpu_notes_buf_size);
- return -ENOMEM;
- }
- fw_dump.cpu_notes_buf = __pa(note_buf);
-
- pr_debug("Allocated buffer for cpu notes of size %ld at %p\n",
- (num_cpus * sizeof(note_buf_t)), note_buf);
-
- if (fw_dump.fadumphdr_addr)
- fdh = __va(fw_dump.fadumphdr_addr);
-
- for (i = 0; i < num_cpus; i++) {
- if (be64_to_cpu(reg_entry->reg_id) != REG_ID("CPUSTRT")) {
- printk(KERN_ERR "Unable to read CPU state data\n");
- rc = -ENOENT;
- goto error_out;
- }
- /* Lower 4 bytes of reg_value contains logical cpu id */
- cpu = be64_to_cpu(reg_entry->reg_value) & FADUMP_CPU_ID_MASK;
- if (fdh && !cpumask_test_cpu(cpu, &fdh->online_mask)) {
- SKIP_TO_NEXT_CPU(reg_entry);
- continue;
- }
- pr_debug("Reading register data for cpu %d...\n", cpu);
- if (fdh && fdh->crashing_cpu == cpu) {
- regs = fdh->regs;
- note_buf = fadump_regs_to_elf_notes(note_buf, ®s);
- SKIP_TO_NEXT_CPU(reg_entry);
- } else {
- reg_entry++;
- reg_entry = fadump_read_registers(reg_entry, ®s);
- note_buf = fadump_regs_to_elf_notes(note_buf, ®s);
- }
- }
- final_note(note_buf);
-
- if (fdh) {
- addr = fdh->elfcorehdr_addr;
- pr_debug("Updating elfcore header(%lx) with cpu notes\n", addr);
- fadump_update_elfcore_header(&fw_dump, (char *)__va(addr));
- }
- return 0;
-
-error_out:
- fadump_cpu_notes_buf_free((unsigned long)__va(fw_dump.cpu_notes_buf),
- fw_dump.cpu_notes_buf_size);
- fw_dump.cpu_notes_buf = 0;
- fw_dump.cpu_notes_buf_size = 0;
- return rc;
-
-}
-
-/*
- * Validate and process the dump data stored by firmware before exporting
- * it through '/proc/vmcore'.
- */
-static int __init process_fadump(const struct fadump_mem_struct *fdm_active)
-{
- struct fadump_crash_info_header *fdh;
- int rc = 0;
-
- if (!fdm_active || !fw_dump.fadumphdr_addr)
- return -EINVAL;
-
- /* Check if the dump data is valid. */
- if ((be16_to_cpu(fdm_active->header.dump_status_flag) == FADUMP_ERROR_FLAG) ||
- (fdm_active->cpu_state_data.error_flags != 0) ||
- (fdm_active->rmr_region.error_flags != 0)) {
- printk(KERN_ERR "Dump taken by platform is not valid\n");
- return -EINVAL;
- }
- if ((fdm_active->rmr_region.bytes_dumped !=
- fdm_active->rmr_region.source_len) ||
- !fdm_active->cpu_state_data.bytes_dumped) {
- printk(KERN_ERR "Dump taken by platform is incomplete\n");
- return -EINVAL;
- }
-
- /* Validate the fadump crash info header */
- fdh = __va(fw_dump.fadumphdr_addr);
- if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) {
- printk(KERN_ERR "Crash info header is not valid.\n");
- return -EINVAL;
- }
-
- rc = fadump_build_cpu_notes(fdm_active);
- if (rc)
- return rc;
-
- /*
- * We are done validating dump info and elfcore header is now ready
- * to be exported. set elfcorehdr_addr so that vmcore module will
- * export the elfcore header through '/proc/vmcore'.
- */
- elfcorehdr_addr = fdh->elfcorehdr_addr;
-
- return 0;
+ fadump_ops->crash_fadump(str);
}
static inline void fadump_add_crash_memory(unsigned long long base,
@@ -917,7 +587,7 @@ static void fadump_setup_crash_memory_ranges(void)
static inline unsigned long fadump_relocate(unsigned long paddr)
{
if (paddr > RMA_START && paddr < fw_dump.boot_memory_size)
- return be64_to_cpu(fdm.rmr_region.destination_address) + paddr;
+ return fw_dump.rmr_destination_addr + paddr;
else
return paddr;
}
@@ -990,7 +660,7 @@ static int fadump_create_elfcore_headers(char *bufp)
* to the specified destination_address. Hence set
* the correct offset.
*/
- phdr->p_offset = be64_to_cpu(fdm.rmr_region.destination_address);
+ phdr->p_offset = fw_dump.rmr_destination_addr;
}
phdr->p_paddr = mbase;
@@ -1039,7 +709,8 @@ static int register_fadump(void)
fadump_setup_crash_memory_ranges();
- addr = be64_to_cpu(fdm.rmr_region.destination_address) + be64_to_cpu(fdm.rmr_region.source_len);
+ addr = fadump_ops->get_meta_area_start(&fw_dump);
+
/* Initialize fadump crash info header. */
addr = init_fadump_header(addr);
vaddr = __va(addr);
@@ -1048,72 +719,19 @@ static int register_fadump(void)
fadump_create_elfcore_headers(vaddr);
/* register the future kernel dump with firmware. */
- return register_fw_dump(&fdm);
-}
-
-static int fadump_unregister_dump(struct fadump_mem_struct *fdm)
-{
- int rc = 0;
- unsigned int wait_time;
-
- pr_debug("Un-register firmware-assisted dump\n");
-
- /* TODO: Add upper time limit for the delay */
- do {
- rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL,
- FADUMP_UNREGISTER, fdm,
- sizeof(struct fadump_mem_struct));
-
- wait_time = rtas_busy_delay_time(rc);
- if (wait_time)
- mdelay(wait_time);
- } while (wait_time);
-
- if (rc) {
- printk(KERN_ERR "Failed to un-register firmware-assisted dump."
- " unexpected error(%d).\n", rc);
- return rc;
- }
- fw_dump.dump_registered = 0;
- return 0;
-}
-
-static int fadump_invalidate_dump(const struct fadump_mem_struct *fdm)
-{
- int rc = 0;
- unsigned int wait_time;
-
- pr_debug("Invalidating firmware-assisted dump registration\n");
-
- /* TODO: Add upper time limit for the delay */
- do {
- rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL,
- FADUMP_INVALIDATE, fdm,
- sizeof(struct fadump_mem_struct));
-
- wait_time = rtas_busy_delay_time(rc);
- if (wait_time)
- mdelay(wait_time);
- } while (wait_time);
-
- if (rc) {
- pr_err("Failed to invalidate firmware-assisted dump registration. Unexpected error (%d).\n", rc);
- return rc;
- }
- fw_dump.dump_active = 0;
- fdm_active = NULL;
- return 0;
+ pr_debug("Registering for firmware-assisted kernel dump...\n");
+ return fadump_ops->register_fadump(&fw_dump);
}
void fadump_cleanup(void)
{
/* Invalidate the registration only if dump is active. */
if (fw_dump.dump_active) {
- /* pass the same memory dump structure provided by platform */
- fadump_invalidate_dump(fdm_active);
+ pr_debug("Invalidating firmware-assisted dump registration\n");
+ fadump_ops->invalidate_fadump(&fw_dump);
} else if (fw_dump.dump_registered) {
/* Un-register Firmware-assisted dump if it was registered. */
- fadump_unregister_dump(&fdm);
+ fadump_ops->unregister_fadump(&fw_dump);
}
}
@@ -1195,7 +813,7 @@ static void fadump_invalidate_release_mem(void)
return;
}
- destination_address = be64_to_cpu(fdm_active->cpu_state_data.destination_address);
+ destination_address = fadump_ops->get_preserv_area_start(&fw_dump);
fadump_cleanup();
mutex_unlock(&fadump_mutex);
@@ -1221,8 +839,10 @@ static void fadump_invalidate_release_mem(void)
fw_dump.cpu_notes_buf = 0;
fw_dump.cpu_notes_buf_size = 0;
}
+
/* Initialize the kernel dump memory structure for FAD registration. */
- init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start);
+ fadump_ops->init_fadump_mem_struct(&fw_dump,
+ fw_dump.reserve_dump_area_start);
}
static ssize_t fadump_release_memory_store(struct kobject *kobj,
@@ -1273,7 +893,7 @@ static ssize_t fadump_register_store(struct kobject *kobj,
int ret = 0;
int input = -1;
- if (!fw_dump.fadump_enabled || fdm_active)
+ if (!fw_dump.fadump_enabled || fw_dump.dump_active)
return -EPERM;
if (kstrtoint(buf, 0, &input))
@@ -1286,8 +906,10 @@ static ssize_t fadump_register_store(struct kobject *kobj,
if (fw_dump.dump_registered == 0) {
goto unlock_out;
}
+
/* Un-register Firmware-assisted dump */
- fadump_unregister_dump(&fdm);
+ pr_debug("Un-register firmware-assisted dump\n");
+ fadump_ops->unregister_fadump(&fw_dump);
break;
case 1:
if (fw_dump.dump_registered == 1) {
@@ -1309,62 +931,13 @@ static ssize_t fadump_register_store(struct kobject *kobj,
static int fadump_region_show(struct seq_file *m, void *private)
{
- const struct fadump_mem_struct *fdm_ptr;
-
if (!fw_dump.fadump_enabled)
return 0;
mutex_lock(&fadump_mutex);
- if (fdm_active)
- fdm_ptr = fdm_active;
- else {
- mutex_unlock(&fadump_mutex);
- fdm_ptr = &fdm;
- }
+ fadump_ops->fadump_region_show(&fw_dump, m);
+ mutex_unlock(&fadump_mutex);
- seq_printf(m,
- "CPU : [%#016llx-%#016llx] %#llx bytes, "
- "Dumped: %#llx\n",
- be64_to_cpu(fdm_ptr->cpu_state_data.destination_address),
- be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) +
- be64_to_cpu(fdm_ptr->cpu_state_data.source_len) - 1,
- be64_to_cpu(fdm_ptr->cpu_state_data.source_len),
- be64_to_cpu(fdm_ptr->cpu_state_data.bytes_dumped));
- seq_printf(m,
- "HPTE: [%#016llx-%#016llx] %#llx bytes, "
- "Dumped: %#llx\n",
- be64_to_cpu(fdm_ptr->hpte_region.destination_address),
- be64_to_cpu(fdm_ptr->hpte_region.destination_address) +
- be64_to_cpu(fdm_ptr->hpte_region.source_len) - 1,
- be64_to_cpu(fdm_ptr->hpte_region.source_len),
- be64_to_cpu(fdm_ptr->hpte_region.bytes_dumped));
- seq_printf(m,
- "DUMP: [%#016llx-%#016llx] %#llx bytes, "
- "Dumped: %#llx\n",
- be64_to_cpu(fdm_ptr->rmr_region.destination_address),
- be64_to_cpu(fdm_ptr->rmr_region.destination_address) +
- be64_to_cpu(fdm_ptr->rmr_region.source_len) - 1,
- be64_to_cpu(fdm_ptr->rmr_region.source_len),
- be64_to_cpu(fdm_ptr->rmr_region.bytes_dumped));
-
- if (!fdm_active ||
- (fw_dump.reserve_dump_area_start ==
- be64_to_cpu(fdm_ptr->cpu_state_data.destination_address)))
- goto out;
-
- /* Dump is active. Show reserved memory region. */
- seq_printf(m,
- " : [%#016llx-%#016llx] %#llx bytes, "
- "Dumped: %#llx\n",
- (unsigned long long)fw_dump.reserve_dump_area_start,
- be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) - 1,
- be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) -
- fw_dump.reserve_dump_area_start,
- be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) -
- fw_dump.reserve_dump_area_start);
-out:
- if (fdm_active)
- mutex_unlock(&fadump_mutex);
return 0;
}
@@ -1445,12 +1018,13 @@ int __init setup_fadump(void)
* if dump process fails then invalidate the registration
* and release memory before proceeding for re-registration.
*/
- if (process_fadump(fdm_active) < 0)
+ if (fadump_ops->process_fadump(&fw_dump) < 0)
fadump_invalidate_release_mem();
}
/* Initialize the kernel dump memory structure for FAD registration. */
else if (fw_dump.reserve_dump_area_size)
- init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start);
+ fadump_ops->init_fadump_mem_struct(&fw_dump,
+ fw_dump.reserve_dump_area_start);
fadump_init_files();
return 1;
diff --git a/arch/powerpc/kernel/fadump_internal.h b/arch/powerpc/kernel/fadump_internal.h
index 4c26eda..3791da7 100644
--- a/arch/powerpc/kernel/fadump_internal.h
+++ b/arch/powerpc/kernel/fadump_internal.h
@@ -47,6 +47,9 @@
#define FADUMP_UNREGISTER 2
#define FADUMP_INVALIDATE 3
+/* Firmware-Assited Dump platforms */
+#define FADUMP_PLATFORM_PSERIES 1
+
#define FADUMP_CPU_ID_MASK ((1UL << 32) - 1)
#define CPU_UNKNOWN (~((u32)0))
@@ -108,14 +111,34 @@ struct fw_dump {
unsigned long cpu_notes_buf;
unsigned long cpu_notes_buf_size;
+ unsigned long rmr_source_len;
+ unsigned long rmr_destination_addr;
+
int ibm_configure_kernel_dump;
unsigned long fadump_enabled:1;
unsigned long fadump_supported:1;
unsigned long dump_active:1;
unsigned long dump_registered:1;
+ unsigned long fadump_platform:4;
+};
+
+struct fadump_ops_t {
+ ulong (*init_fadump_mem_struct)(struct fw_dump *fadump_config,
+ ulong addr);
+ int (*register_fadump)(struct fw_dump *fadump_config);
+ int (*unregister_fadump)(struct fw_dump *fadump_config);
+ ulong (*get_preserv_area_start)(struct fw_dump *fadump_conf);
+ ulong (*get_meta_area_start)(struct fw_dump *fadump_conf);
+ int (*invalidate_fadump)(struct fw_dump *fadump_config);
+ int (*process_fadump)(struct fw_dump *fadump_config);
+ void (*fadump_region_show)(struct fw_dump *fadump_config,
+ struct seq_file *m);
+ void (*crash_fadump)(const char *msg);
};
+extern struct fadump_ops_t *fadump_ops;
+
int is_boot_memory_area_contiguous(struct fw_dump *fadump_conf);
int is_reserved_memory_area_contiguous(struct fw_dump *fadump_conf);
void *fadump_cpu_notes_buf_alloc(unsigned long size);
@@ -124,4 +147,14 @@ void fadump_set_regval(struct pt_regs *regs, u64 reg_id, u64 reg_val);
u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs);
void fadump_update_elfcore_header(struct fw_dump *fadump_config, char *bufp);
+#ifdef CONFIG_PPC_PSERIES
+extern int pseries_dt_scan_fadump(struct fw_dump *fadump_config, ulong node);
+#else
+static inline int
+pseries_dt_scan_fadump(struct fw_dump *fadump_config, ulong node)
+{
+ return 1;
+}
+#endif
+
#endif /* __PPC64_FA_DUMP_INTERNAL_H__ */
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index 13eede6..978585c 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -24,6 +24,7 @@ obj-$(CONFIG_IO_EVENT_IRQ) += io_event_irq.o
obj-$(CONFIG_LPARCFG) += lparcfg.o
obj-$(CONFIG_IBMVIO) += vio.o
obj-$(CONFIG_IBMEBUS) += ibmebus.o
+obj-$(CONFIG_FA_DUMP) += pseries_fadump.o
ifeq ($(CONFIG_PPC_PSERIES),y)
obj-$(CONFIG_SUSPEND) += suspend.o
diff --git a/arch/powerpc/platforms/pseries/pseries_fadump.c b/arch/powerpc/platforms/pseries/pseries_fadump.c
new file mode 100644
index 0000000..ac54501
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/pseries_fadump.c
@@ -0,0 +1,528 @@
+/*
+ * Firmware-Assisted Dump support on POWERVM platform.
+ *
+ * Copyright 2011, IBM Corporation
+ * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+ *
+ * Copyright 2018, IBM Corporation
+ * Author: Hari Bathini <hbathini@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#undef DEBUG
+#define pr_fmt(fmt) "pseries fadump: " fmt
+
+#include <linux/string.h>
+#include <linux/memblock.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/crash_dump.h>
+
+#include <asm/page.h>
+#include <asm/prom.h>
+#include <asm/rtas.h>
+#include <asm/fadump.h>
+
+#include "../../kernel/fadump_internal.h"
+#include "pseries_fadump.h"
+
+static struct pseries_fadump_mem_struct fdm;
+static const struct pseries_fadump_mem_struct *fdm_active;
+
+static void update_fadump_config(struct fw_dump *fadump_conf,
+ const struct pseries_fadump_mem_struct *fdm)
+{
+ fadump_conf->rmr_destination_addr =
+ be64_to_cpu(fdm->rmr_region.destination_address);
+
+ if (fadump_conf->dump_active) {
+ fadump_conf->rmr_source_len =
+ be64_to_cpu(fdm->rmr_region.source_len);
+ }
+}
+
+static ulong pseries_init_fadump_mem_struct(struct fw_dump *fadump_conf,
+ ulong addr)
+{
+ memset(&fdm, 0, sizeof(struct pseries_fadump_mem_struct));
+ addr = addr & PAGE_MASK;
+
+ fdm.header.dump_format_version = cpu_to_be32(0x00000001);
+ fdm.header.dump_num_sections = cpu_to_be16(3);
+ fdm.header.dump_status_flag = 0;
+ fdm.header.offset_first_dump_section =
+ cpu_to_be32((u32)offsetof(struct pseries_fadump_mem_struct,
+ cpu_state_data));
+
+ /*
+ * Fields for disk dump option.
+ * We are not using disk dump option, hence set these fields to 0.
+ */
+ fdm.header.dd_block_size = 0;
+ fdm.header.dd_block_offset = 0;
+ fdm.header.dd_num_blocks = 0;
+ fdm.header.dd_offset_disk_path = 0;
+
+ /* set 0 to disable an automatic dump-reboot. */
+ fdm.header.max_time_auto = 0;
+
+ /* Kernel dump sections */
+ /* cpu state data section. */
+ fdm.cpu_state_data.request_flag =
+ cpu_to_be32(PSERIES_FADUMP_REQUEST_FLAG);
+ fdm.cpu_state_data.source_data_type =
+ cpu_to_be16(PSERIES_FADUMP_CPU_STATE_DATA);
+ fdm.cpu_state_data.source_address = 0;
+ fdm.cpu_state_data.source_len =
+ cpu_to_be64(fadump_conf->cpu_state_data_size);
+ fdm.cpu_state_data.destination_address = cpu_to_be64(addr);
+ addr += fadump_conf->cpu_state_data_size;
+
+ /* hpte region section */
+ fdm.hpte_region.request_flag = cpu_to_be32(PSERIES_FADUMP_REQUEST_FLAG);
+ fdm.hpte_region.source_data_type =
+ cpu_to_be16(PSERIES_FADUMP_HPTE_REGION);
+ fdm.hpte_region.source_address = 0;
+ fdm.hpte_region.source_len =
+ cpu_to_be64(fadump_conf->hpte_region_size);
+ fdm.hpte_region.destination_address = cpu_to_be64(addr);
+ addr += fadump_conf->hpte_region_size;
+
+ /* RMA region section */
+ fdm.rmr_region.request_flag = cpu_to_be32(PSERIES_FADUMP_REQUEST_FLAG);
+ fdm.rmr_region.source_data_type =
+ cpu_to_be16(PSERIES_FADUMP_REAL_MODE_REGION);
+ fdm.rmr_region.source_address = cpu_to_be64(RMA_START);
+ fdm.rmr_region.source_len =
+ cpu_to_be64(fadump_conf->boot_memory_size);
+ fdm.rmr_region.destination_address = cpu_to_be64(addr);
+ addr += fadump_conf->boot_memory_size;
+
+ update_fadump_config(fadump_conf, &fdm);
+
+ return addr;
+}
+
+static int pseries_register_fadump(struct fw_dump *fadump_conf)
+{
+ int rc, err;
+ unsigned int wait_time;
+
+ /* TODO: Add upper time limit for the delay */
+ do {
+ rc = rtas_call(fadump_conf->ibm_configure_kernel_dump, 3, 1,
+ NULL, FADUMP_REGISTER, &fdm,
+ sizeof(struct pseries_fadump_mem_struct));
+
+ wait_time = rtas_busy_delay_time(rc);
+ if (wait_time)
+ mdelay(wait_time);
+
+ } while (wait_time);
+
+ err = -EIO;
+ switch (rc) {
+ default:
+ pr_err("Failed to register. Unknown Error(%d).\n", rc);
+ break;
+ case -1:
+ pr_err("Failed to register. Hardware Error(%d).\n", rc);
+ break;
+ case -3:
+ if (!is_boot_memory_area_contiguous(fadump_conf))
+ pr_err("Can't have holes in boot memory area.\n");
+ else if (!is_reserved_memory_area_contiguous(fadump_conf))
+ pr_err("Can't have holes in reserved memory area.\n");
+
+ pr_err("Failed to register. Parameter Error(%d).\n", rc);
+ err = -EINVAL;
+ break;
+ case -9:
+ pr_err("Already registered!\n");
+ fadump_conf->dump_registered = 1;
+ err = -EEXIST;
+ break;
+ case 0:
+ pr_err("Registration is successful!\n");
+ fadump_conf->dump_registered = 1;
+ err = 0;
+ break;
+ }
+
+ return err;
+}
+
+static int pseries_unregister_fadump(struct fw_dump *fadump_conf)
+{
+ int rc = 0;
+ unsigned int wait_time;
+
+ /* TODO: Add upper time limit for the delay */
+ do {
+ rc = rtas_call(fadump_conf->ibm_configure_kernel_dump, 3, 1,
+ NULL, FADUMP_UNREGISTER, &fdm,
+ sizeof(struct pseries_fadump_mem_struct));
+
+ wait_time = rtas_busy_delay_time(rc);
+ if (wait_time)
+ mdelay(wait_time);
+ } while (wait_time);
+
+ if (rc) {
+ pr_err("Failed to un-register - unexpected error(%d).\n", rc);
+ return rc;
+ }
+
+ fadump_conf->dump_registered = 0;
+ return 0;
+}
+
+static ulong pseries_get_preserv_area_start(struct fw_dump *fadump_conf)
+{
+ const struct pseries_fadump_mem_struct *fdm_ptr;
+
+ if (fdm_active)
+ fdm_ptr = fdm_active;
+ else
+ fdm_ptr = &fdm;
+
+ return be64_to_cpu(fdm_ptr->cpu_state_data.destination_address);
+}
+
+static ulong pseries_get_meta_area_start(struct fw_dump *fadump_conf)
+{
+ return (fadump_conf->rmr_destination_addr +
+ fadump_conf->rmr_source_len);
+}
+
+static int pseries_invalidate_fadump(struct fw_dump *fadump_conf)
+{
+ int rc = 0;
+ unsigned int wait_time;
+
+ /* TODO: Add upper time limit for the delay */
+ do {
+ rc = rtas_call(fadump_conf->ibm_configure_kernel_dump, 3, 1,
+ NULL, FADUMP_INVALIDATE, fdm_active,
+ sizeof(struct pseries_fadump_mem_struct));
+
+ wait_time = rtas_busy_delay_time(rc);
+ if (wait_time)
+ mdelay(wait_time);
+ } while (wait_time);
+
+ if (rc) {
+ pr_err("Failed to invalidate - unexpected error (%d).\n", rc);
+ return rc;
+ }
+
+ fadump_conf->dump_active = 0;
+ fdm_active = NULL;
+ return 0;
+}
+
+static struct fadump_reg_entry*
+fadump_read_registers(struct fadump_reg_entry *reg_entry, struct pt_regs *regs)
+{
+ memset(regs, 0, sizeof(struct pt_regs));
+
+ while (be64_to_cpu(reg_entry->reg_id) != REG_ID("CPUEND")) {
+ fadump_set_regval(regs, be64_to_cpu(reg_entry->reg_id),
+ be64_to_cpu(reg_entry->reg_value));
+ reg_entry++;
+ }
+ reg_entry++;
+ return reg_entry;
+}
+
+/*
+ * Read CPU state dump data and convert it into ELF notes.
+ * The CPU dump starts with magic number "REGSAVE". NumCpusOffset should be
+ * used to access the data to allow for additional fields to be added without
+ * affecting compatibility. Each list of registers for a CPU starts with
+ * "CPUSTRT" and ends with "CPUEND". Each register entry is of 16 bytes,
+ * 8 Byte ASCII identifier and 8 Byte register value. The register entry
+ * with identifier "CPUSTRT" and "CPUEND" contains 4 byte cpu id as part
+ * of register value. For more details refer to PAPR document.
+ *
+ * Only for the crashing cpu we ignore the CPU dump data and get exact
+ * state from fadump crash info structure populated by first kernel at the
+ * time of crash.
+ */
+static int __init fadump_build_cpu_notes(struct fw_dump *fadump_conf)
+{
+ struct fadump_reg_save_area_header *reg_header;
+ struct fadump_reg_entry *reg_entry;
+ struct fadump_crash_info_header *fdh = NULL;
+ void *vaddr;
+ unsigned long addr;
+ u32 num_cpus, *note_buf;
+ struct pt_regs regs;
+ int i, rc = 0, cpu = 0;
+
+ addr = be64_to_cpu(fdm_active->cpu_state_data.destination_address);
+ vaddr = __va(addr);
+
+ reg_header = vaddr;
+ if (be64_to_cpu(reg_header->magic_number) != REGSAVE_AREA_MAGIC) {
+ pr_err("Unable to read register save area.\n");
+ return -ENOENT;
+ }
+
+ pr_debug("--------CPU State Data------------\n");
+ pr_debug("Magic Number: %llx\n", be64_to_cpu(reg_header->magic_number));
+ pr_debug("NumCpuOffset: %x\n", be32_to_cpu(reg_header->num_cpu_offset));
+
+ vaddr += be32_to_cpu(reg_header->num_cpu_offset);
+ num_cpus = be32_to_cpu(*((__be32 *)(vaddr)));
+ pr_debug("NumCpus : %u\n", num_cpus);
+ vaddr += sizeof(u32);
+ reg_entry = (struct fadump_reg_entry *)vaddr;
+
+ /* Allocate buffer to hold cpu crash notes. */
+ fadump_conf->cpu_notes_buf_size = num_cpus * sizeof(note_buf_t);
+ fadump_conf->cpu_notes_buf_size =
+ PAGE_ALIGN(fadump_conf->cpu_notes_buf_size);
+ note_buf = fadump_cpu_notes_buf_alloc(fadump_conf->cpu_notes_buf_size);
+ if (!note_buf) {
+ pr_err("Failed to allocate 0x%lx bytes for cpu notes buffer\n",
+ fadump_conf->cpu_notes_buf_size);
+ return -ENOMEM;
+ }
+ fadump_conf->cpu_notes_buf = __pa(note_buf);
+
+ pr_debug("Allocated buffer for cpu notes of size %ld at %p\n",
+ (num_cpus * sizeof(note_buf_t)), note_buf);
+
+ if (fadump_conf->fadumphdr_addr)
+ fdh = __va(fadump_conf->fadumphdr_addr);
+
+ for (i = 0; i < num_cpus; i++) {
+ if (be64_to_cpu(reg_entry->reg_id) != REG_ID("CPUSTRT")) {
+ pr_err("Unable to read CPU state data\n");
+ rc = -ENOENT;
+ goto error_out;
+ }
+ /* Lower 4 bytes of reg_value contains logical cpu id */
+ cpu = be64_to_cpu(reg_entry->reg_value) & FADUMP_CPU_ID_MASK;
+ if (fdh && !cpumask_test_cpu(cpu, &fdh->online_mask)) {
+ SKIP_TO_NEXT_CPU(reg_entry);
+ continue;
+ }
+ pr_debug("Reading register data for cpu %d...\n", cpu);
+ if (fdh && fdh->crashing_cpu == cpu) {
+ regs = fdh->regs;
+ note_buf = fadump_regs_to_elf_notes(note_buf, ®s);
+ SKIP_TO_NEXT_CPU(reg_entry);
+ } else {
+ reg_entry++;
+ reg_entry = fadump_read_registers(reg_entry, ®s);
+ note_buf = fadump_regs_to_elf_notes(note_buf, ®s);
+ }
+ }
+ final_note(note_buf);
+
+ if (fdh) {
+ pr_debug("Updating elfcore header (%llx) with cpu notes\n",
+ fdh->elfcorehdr_addr);
+ fadump_update_elfcore_header(fadump_conf,
+ __va(fdh->elfcorehdr_addr));
+ }
+ return 0;
+
+error_out:
+ fadump_cpu_notes_buf_free((ulong)__va(fadump_conf->cpu_notes_buf),
+ fadump_conf->cpu_notes_buf_size);
+ fadump_conf->cpu_notes_buf = 0;
+ fadump_conf->cpu_notes_buf_size = 0;
+ return rc;
+
+}
+
+/*
+ * Validate and process the dump data stored by firmware before exporting
+ * it through '/proc/vmcore'.
+ */
+static int __init pseries_process_fadump(struct fw_dump *fadump_conf)
+{
+ struct fadump_crash_info_header *fdh;
+ int rc = 0;
+
+ if (!fdm_active || !fadump_conf->fadumphdr_addr)
+ return -EINVAL;
+
+ /* Check if the dump data is valid. */
+ if ((be16_to_cpu(fdm_active->header.dump_status_flag) ==
+ PSERIES_FADUMP_ERROR_FLAG) ||
+ (fdm_active->cpu_state_data.error_flags != 0) ||
+ (fdm_active->rmr_region.error_flags != 0)) {
+ pr_err("Dump taken by platform is not valid\n");
+ return -EINVAL;
+ }
+ if ((fdm_active->rmr_region.bytes_dumped !=
+ fdm_active->rmr_region.source_len) ||
+ !fdm_active->cpu_state_data.bytes_dumped) {
+ pr_err("Dump taken by platform is incomplete\n");
+ return -EINVAL;
+ }
+
+ /* Validate the fadump crash info header */
+ fdh = __va(fadump_conf->fadumphdr_addr);
+ if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) {
+ pr_err("Crash info header is not valid.\n");
+ return -EINVAL;
+ }
+
+ if (!fdm_active->cpu_state_data.bytes_dumped)
+ return -EINVAL;
+
+ rc = fadump_build_cpu_notes(fadump_conf);
+ if (rc)
+ return rc;
+
+ /*
+ * We are done validating dump info and elfcore header is now ready
+ * to be exported. set elfcorehdr_addr so that vmcore module will
+ * export the elfcore header through '/proc/vmcore'.
+ */
+ elfcorehdr_addr = fdh->elfcorehdr_addr;
+
+ return 0;
+}
+
+static void pseries_fadump_region_show(struct fw_dump *fadump_conf,
+ struct seq_file *m)
+{
+ const struct pseries_fadump_mem_struct *fdm_ptr;
+ const struct pseries_fadump_section *cpu_data_section;
+
+ if (fdm_active)
+ fdm_ptr = fdm_active;
+ else
+ fdm_ptr = &fdm;
+
+ cpu_data_section = &(fdm_ptr->cpu_state_data);
+ seq_printf(m,
+ "CPU :[%#016llx-%#016llx] %#llx bytes, Dumped: %#llx\n",
+ be64_to_cpu(cpu_data_section->destination_address),
+ be64_to_cpu(cpu_data_section->destination_address) +
+ be64_to_cpu(cpu_data_section->source_len) - 1,
+ be64_to_cpu(cpu_data_section->source_len),
+ be64_to_cpu(cpu_data_section->bytes_dumped));
+ seq_printf(m,
+ "HPTE:[%#016llx-%#016llx] %#llx bytes, Dumped: %#llx\n",
+ be64_to_cpu(fdm_ptr->hpte_region.destination_address),
+ be64_to_cpu(fdm_ptr->hpte_region.destination_address) +
+ be64_to_cpu(fdm_ptr->hpte_region.source_len) - 1,
+ be64_to_cpu(fdm_ptr->hpte_region.source_len),
+ be64_to_cpu(fdm_ptr->hpte_region.bytes_dumped));
+ seq_printf(m,
+ "DUMP:[%#016llx-%#016llx] %#llx bytes, Dumped: %#llx\n",
+ be64_to_cpu(fdm_ptr->rmr_region.destination_address),
+ be64_to_cpu(fdm_ptr->rmr_region.destination_address) +
+ be64_to_cpu(fdm_ptr->rmr_region.source_len) - 1,
+ be64_to_cpu(fdm_ptr->rmr_region.source_len),
+ be64_to_cpu(fdm_ptr->rmr_region.bytes_dumped));
+
+ if (!fdm_active ||
+ (fadump_conf->reserve_dump_area_start ==
+ be64_to_cpu(cpu_data_section->destination_address)))
+ return;
+
+ /* Dump is active. Show reserved memory region. */
+ seq_printf(m,
+ " :[%#016lx-%#016llx] %#llx bytes, Dumped: %#llx\n",
+ fadump_conf->reserve_dump_area_start,
+ be64_to_cpu(cpu_data_section->destination_address) - 1,
+ be64_to_cpu(cpu_data_section->destination_address) -
+ fadump_conf->reserve_dump_area_start,
+ be64_to_cpu(cpu_data_section->destination_address) -
+ fadump_conf->reserve_dump_area_start);
+}
+
+static void pseries_crash_fadump(const char *msg)
+{
+ /* Call ibm,os-term rtas call to trigger firmware assisted dump */
+ rtas_os_term((char *)msg);
+}
+
+
+static struct fadump_ops_t pseries_fadump_ops = {
+ .init_fadump_mem_struct = pseries_init_fadump_mem_struct,
+ .register_fadump = pseries_register_fadump,
+ .unregister_fadump = pseries_unregister_fadump,
+ .get_preserv_area_start = pseries_get_preserv_area_start,
+ .get_meta_area_start = pseries_get_meta_area_start,
+ .invalidate_fadump = pseries_invalidate_fadump,
+ .process_fadump = pseries_process_fadump,
+ .fadump_region_show = pseries_fadump_region_show,
+ .crash_fadump = pseries_crash_fadump,
+};
+
+int __init pseries_dt_scan_fadump(struct fw_dump *fadump_conf, ulong node)
+{
+ const __be32 *sections;
+ int i, num_sections;
+ int size;
+ const __be32 *token;
+
+ /*
+ * Check if Firmware Assisted dump is supported. if yes, check
+ * if dump has been initiated on last reboot.
+ */
+ token = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL);
+ if (!token)
+ return 1;
+
+ fadump_conf->ibm_configure_kernel_dump = be32_to_cpu(*token);
+
+ /*
+ * The 'ibm,kernel-dump' rtas node is present only if there is
+ * dump data waiting for us.
+ */
+ fdm_active = of_get_flat_dt_prop(node, "ibm,kernel-dump", NULL);
+ if (fdm_active) {
+ pr_info("Firmware-assisted dump is active.\n");
+ fadump_conf->dump_active = 1;
+ update_fadump_config(fadump_conf, fdm_active);
+ }
+
+ /* Get the sizes required to store dump data for the firmware provided
+ * dump sections.
+ * For each dump section type supported, a 32bit cell which defines
+ * the ID of a supported section followed by two 32 bit cells which
+ * gives the size of the section in bytes.
+ */
+ sections = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump-sizes",
+ &size);
+
+ if (!sections)
+ return 1;
+
+ num_sections = size / (3 * sizeof(u32));
+
+ for (i = 0; i < num_sections; i++, sections += 3) {
+ u32 type = (u32)of_read_number(sections, 1);
+
+ switch (type) {
+ case PSERIES_FADUMP_CPU_STATE_DATA:
+ fadump_conf->cpu_state_data_size =
+ of_read_ulong(§ions[1], 2);
+ break;
+ case PSERIES_FADUMP_HPTE_REGION:
+ fadump_conf->hpte_region_size =
+ of_read_ulong(§ions[1], 2);
+ break;
+ }
+ }
+
+ fadump_ops = &pseries_fadump_ops;
+ fadump_conf->fadump_supported = 1;
+ fadump_conf->fadump_platform = FADUMP_PLATFORM_PSERIES;
+
+ return 1;
+}
diff --git a/arch/powerpc/platforms/pseries/pseries_fadump.h b/arch/powerpc/platforms/pseries/pseries_fadump.h
new file mode 100644
index 0000000..07226e6
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/pseries_fadump.h
@@ -0,0 +1,96 @@
+/*
+ * Firmware-Assisted Dump support on POWERVM platform.
+ *
+ * Copyright 2018, IBM Corporation
+ * Author: Hari Bathini <hbathini@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef __PPC64_PSERIES_FA_DUMP_H__
+#define __PPC64_PSERIES_FA_DUMP_H__
+
+/* Firmware provided dump sections */
+#define PSERIES_FADUMP_CPU_STATE_DATA 0x0001
+#define PSERIES_FADUMP_HPTE_REGION 0x0002
+#define PSERIES_FADUMP_REAL_MODE_REGION 0x0011
+
+/* Dump request flag */
+#define PSERIES_FADUMP_REQUEST_FLAG 0x00000001
+
+/* Dump status flag */
+#define PSERIES_FADUMP_ERROR_FLAG 0x2000
+
+/* Utility macros */
+#define SKIP_TO_NEXT_CPU(reg_entry) \
+({ \
+ while (be64_to_cpu(reg_entry->reg_id) != REG_ID("CPUEND")) \
+ reg_entry++; \
+ reg_entry++; \
+})
+
+/* Kernel Dump section info */
+struct pseries_fadump_section {
+ __be32 request_flag;
+ __be16 source_data_type;
+ __be16 error_flags;
+ __be64 source_address;
+ __be64 source_len;
+ __be64 bytes_dumped;
+ __be64 destination_address;
+};
+
+/* ibm,configure-kernel-dump header. */
+struct pseries_fadump_section_header {
+ __be32 dump_format_version;
+ __be16 dump_num_sections;
+ __be16 dump_status_flag;
+ __be32 offset_first_dump_section;
+
+ /* Fields for disk dump option. */
+ __be32 dd_block_size;
+ __be64 dd_block_offset;
+ __be64 dd_num_blocks;
+ __be32 dd_offset_disk_path;
+
+ /* Maximum time allowed to prevent an automatic dump-reboot. */
+ __be32 max_time_auto;
+};
+
+/*
+ * Firmware Assisted dump memory structure. This structure is required for
+ * registering future kernel dump with power firmware through rtas call.
+ *
+ * No disk dump option. Hence disk dump path string section is not included.
+ */
+struct pseries_fadump_mem_struct {
+ struct pseries_fadump_section_header header;
+
+ /* Kernel dump sections */
+ struct pseries_fadump_section cpu_state_data;
+ struct pseries_fadump_section hpte_region;
+ struct pseries_fadump_section rmr_region;
+};
+
+#define REGSAVE_AREA_MAGIC STR_TO_HEX("REGSAVE")
+
+/* The firmware-assisted dump format.
+ *
+ * The register save area is an area in the partition's memory used to preserve
+ * the register contents (CPU state data) for the active CPUs during a firmware
+ * assisted dump. The dump format contains register save area header followed
+ * by register entries. On pseries, each list of registers for a CPU starts with
+ * "CPUSTRT" and ends with "CPUEND".
+ */
+
+/* Register save area header. */
+struct fadump_reg_save_area_header {
+ __be64 magic_number;
+ __be32 version;
+ __be32 num_cpu_offset;
+};
+
+#endif /* __PPC64_PSERIES_FA_DUMP_H__ */
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [RFC PATCH 3/5] powerpc/fadump: enable fadump support on powernv platform
2018-05-15 4:58 [RFC PATCH 0/5] Add FADump support on PowerNV platform Hari Bathini
2018-05-15 4:58 ` [RFC PATCH 1/5] powerpc/fadump: move internal fadump code to a new file Hari Bathini
2018-05-15 4:59 ` [RFC PATCH 2/5] pseries/fadump: move out platform specific support from generic code Hari Bathini
@ 2018-05-15 4:59 ` Hari Bathini
2018-05-15 4:59 ` [RFC PATCH 4/5] powerpc/fadump: process architected register state data provided by firmware Hari Bathini
2018-05-15 4:59 ` [RFC PATCH 5/5] powerpc/powernv: export /proc/opaldump for analysing opal crashes Hari Bathini
4 siblings, 0 replies; 6+ messages in thread
From: Hari Bathini @ 2018-05-15 4:59 UTC (permalink / raw)
To: Ananth N Mavinakayanahalli, Michael Ellerman,
Mahesh J Salgaonkar, Vasant Hegde, linuxppc-dev, Stewart Smith
Cc: Hari Bathini
From: Hari Bathini <hbathini@linux.vnet.ibm.com>
Firmware-assisted dump support is enabled for POWERNV platform in P9
firmware. Make the corresponding updates in kernel to enable fadump
support on POWERNV platform.
Signed-off-by: Hari Bathini <hbathini@linux.vnet.ibm.com>
---
arch/powerpc/Kconfig | 2
arch/powerpc/include/asm/opal-api.h | 4
arch/powerpc/include/asm/opal.h | 1
arch/powerpc/kernel/fadump.c | 397 ++++++++++++++++++++---
arch/powerpc/kernel/fadump_internal.h | 27 ++
arch/powerpc/platforms/powernv/Makefile | 1
arch/powerpc/platforms/powernv/opal-wrappers.S | 1
arch/powerpc/platforms/powernv/powernv_fadump.c | 337 ++++++++++++++++++++
arch/powerpc/platforms/powernv/powernv_fadump.h | 63 ++++
arch/powerpc/platforms/pseries/pseries_fadump.c | 8
10 files changed, 783 insertions(+), 58 deletions(-)
create mode 100644 arch/powerpc/platforms/powernv/powernv_fadump.c
create mode 100644 arch/powerpc/platforms/powernv/powernv_fadump.h
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index c32a181..d749f1f 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -599,7 +599,7 @@ config CRASH_DUMP
config FA_DUMP
bool "Firmware-assisted dump"
- depends on PPC64 && PPC_RTAS
+ depends on PPC64 && (PPC_RTAS || PPC_POWERNV)
select CRASH_CORE
select CRASH_DUMP
help
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index d886a5b..75e8925 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -206,7 +206,8 @@
#define OPAL_NPU_TL_SET 161
#define OPAL_PCI_GET_PBCQ_TUNNEL_BAR 164
#define OPAL_PCI_SET_PBCQ_TUNNEL_BAR 165
-#define OPAL_LAST 165
+#define OPAL_CONFIGURE_FADUMP 167
+#define OPAL_LAST 167
/* Device tree flags */
@@ -1040,6 +1041,7 @@ enum OpalSysCooling {
enum {
OPAL_REBOOT_NORMAL = 0,
OPAL_REBOOT_PLATFORM_ERROR = 1,
+ OPAL_REBOOT_MPIPL = 3,
};
/* Argument to OPAL_PCI_TCE_KILL */
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 03e1a92..4c1f483 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -43,6 +43,7 @@ int64_t opal_npu_spa_clear_cache(uint64_t phb_id, uint32_t bdfn,
uint64_t PE_handle);
int64_t opal_npu_tl_set(uint64_t phb_id, uint32_t bdfn, long cap,
uint64_t rate_phys, uint32_t size);
+int64_t opal_configure_fadump(uint64_t command, void *data, uint64_t data_size);
int64_t opal_console_write(int64_t term_number, __be64 *length,
const uint8_t *buffer);
int64_t opal_console_read(int64_t term_number, __be64 *length,
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 88fafe1..a27e4af 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -53,8 +53,12 @@ static struct cma *fadump_cma;
struct fadump_ops_t *fadump_ops;
static DEFINE_MUTEX(fadump_mutex);
-struct fad_crash_memory_ranges crash_memory_ranges[INIT_CRASHMEM_RANGES];
+struct fadump_memory_range crash_memory_ranges[INIT_CRASHMEM_RANGES];
int crash_mem_ranges;
+struct fadump_memory_range reserved_ranges[INIT_MEMBLOCK_REGIONS];
+int reserved_ranges_cnt;
+struct fadump_memory_range memory_ranges[2 * INIT_MEMBLOCK_REGIONS];
+int memory_ranges_cnt;
#ifdef CONFIG_CMA
/*
@@ -120,6 +124,9 @@ int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname,
if (strcmp(uname, "rtas") == 0)
return pseries_dt_scan_fadump(&fw_dump, node);
+ if (strcmp(uname, "ibm,dump") == 0)
+ return powernv_dt_scan_fadump(&fw_dump, node);
+
return 0;
}
@@ -156,6 +163,8 @@ int is_fadump_active(void)
/* Print firmware assisted dump configurations for debugging purpose. */
static void fadump_show_config(void)
{
+ int i;
+
pr_debug("Support for firmware-assisted dump (fadump): %s\n",
(fw_dump.fadump_supported ? "present" : "no support"));
@@ -170,6 +179,13 @@ static void fadump_show_config(void)
pr_debug(" CPU state data size: %lx\n", fw_dump.cpu_state_data_size);
pr_debug(" HPTE region size : %lx\n", fw_dump.hpte_region_size);
pr_debug("Boot memory size : %lx\n", fw_dump.boot_memory_size);
+ pr_debug("Real memory region hole size : %lx\n",
+ fw_dump.boot_memory_hole_size);
+ pr_debug("Real meory regions count : %lx\n", fw_dump.rmr_regions_cnt);
+ for (i = 0; i < fw_dump.rmr_regions_cnt; i++) {
+ pr_debug("%d. RMR base = %lx, size = %lx\n", (i+1),
+ fw_dump.rmr_src_addr[i], fw_dump.rmr_src_size[i]);
+ }
}
/**
@@ -243,6 +259,157 @@ static inline unsigned long fadump_calculate_reserve_size(void)
return (size > MIN_BOOT_MEM ? size : MIN_BOOT_MEM);
}
+static void __init fadump_get_reserved_ranges(void)
+{
+ unsigned long i, j, dt_root;
+ unsigned long long base, size;
+ struct fadump_memory_range tmp_range;
+ const __be32 *prop;
+ int len, idx;
+
+ early_init_fdt_reserve_self();
+ early_init_fdt_scan_reserved_mem();
+
+ dt_root = of_get_flat_dt_root();
+
+ prop = of_get_flat_dt_prop(dt_root, "reserved-ranges", &len);
+
+ if (!prop)
+ return;
+
+ /*
+ * Each reserved range is an (address,size) pair, 2 cells each,
+ * totalling 4 cells per range.
+ */
+ for (i = 0; i < len / (sizeof(*prop) * 4); i++) {
+ u64 base, size;
+
+ base = of_read_number(prop + (i * 4) + 0, 2);
+ size = of_read_number(prop + (i * 4) + 2, 2);
+ if (!size)
+ continue;
+
+ reserved_ranges[reserved_ranges_cnt].base = base;
+ reserved_ranges[reserved_ranges_cnt].size = size;
+ reserved_ranges_cnt++;
+
+ }
+
+ if (!reserved_ranges_cnt)
+ return;
+
+ /* Sort the reserved ranges */
+ for (i = 0; i < reserved_ranges_cnt; i++) {
+ idx = i;
+ for (j = i + 1; j < reserved_ranges_cnt; j++) {
+ if (reserved_ranges[idx].base > reserved_ranges[j].base)
+ idx = j;
+ }
+ if (idx != i) {
+ tmp_range = reserved_ranges[idx];
+ reserved_ranges[idx] = reserved_ranges[i];
+ reserved_ranges[i] = tmp_range;
+ }
+ }
+
+ /* Merge adjacent reserved ranges */
+ idx = 0;
+ for (i = 1; i < reserved_ranges_cnt; i++) {
+ base = reserved_ranges[i-1].base;
+ size = reserved_ranges[i-1].size;
+ if (reserved_ranges[i].base == (base + size))
+ reserved_ranges[idx].size += reserved_ranges[i].size;
+ else {
+ idx++;
+ if (i == idx)
+ continue;
+
+ reserved_ranges[idx] = reserved_ranges[i];
+ }
+ }
+ reserved_ranges_cnt = idx + 1;
+}
+
+static inline void fadump_add_memory_range(unsigned long long base,
+ unsigned long long size)
+{
+ if (!size)
+ return;
+
+ pr_debug("memory_range[%d] [%#016llx-%#016llx), %#llx bytes\n",
+ memory_ranges_cnt, base, base + size, size);
+ memory_ranges[memory_ranges_cnt].base = base;
+ memory_ranges[memory_ranges_cnt].size = size;
+ memory_ranges_cnt++;
+}
+
+static void fadump_setup_memory_ranges(void)
+{
+ unsigned long i, j;
+ unsigned long long base, end, size;
+ struct memblock_region *reg;
+ struct fadump_memory_range tmp_ranges[INIT_MEMBLOCK_REGIONS];
+ int tmp_ranges_cnt;
+
+ /* get memory ranges */
+ tmp_ranges_cnt = 0;
+ for_each_memblock(memory, reg) {
+ tmp_ranges[tmp_ranges_cnt].base = (unsigned long long)reg->base;
+ tmp_ranges[tmp_ranges_cnt].size = (unsigned long long)reg->size;
+ tmp_ranges_cnt++;
+ }
+
+ /* exclude reserved ranges */
+ memory_ranges_cnt = 0;
+ for (i = 0; i < tmp_ranges_cnt; i++) {
+ unsigned long long mem_base, mem_end, rsrv_base, rsrv_end;
+ int add = 1;
+
+ base = mem_base = tmp_ranges[i].base;
+ end = mem_end = base + tmp_ranges[i].size;
+ for (j = 0; j < reserved_ranges_cnt; j++) {
+ rsrv_base = reserved_ranges[j].base;
+ rsrv_end = rsrv_base + reserved_ranges[j].size;
+
+ if (mem_base > rsrv_end)
+ continue;
+
+ if ((j < (reserved_ranges_cnt - 1)) &&
+ (reserved_ranges[j + 1].base < mem_end))
+ mem_end = reserved_ranges[j + 1].base;
+
+ if ((rsrv_base < mem_end) && (rsrv_end > mem_base)) {
+ if ((mem_base < rsrv_base) &&
+ (mem_end > rsrv_end)) {
+ size = rsrv_base - mem_base;
+ fadump_add_memory_range(mem_base, size);
+ size = mem_end - rsrv_end;
+ fadump_add_memory_range(rsrv_end, size);
+ } else if (mem_base < rsrv_base) {
+ size = rsrv_base - mem_base;
+ fadump_add_memory_range(mem_base, size);
+ } else if (mem_end > rsrv_end) {
+ size = mem_end - rsrv_end;
+ fadump_add_memory_range(rsrv_end, size);
+ }
+
+ add = 0;
+ }
+
+ if (mem_end == end)
+ break;
+
+ mem_base = mem_end;
+ mem_end = end;
+ add = 1;
+ }
+
+ if (add)
+ fadump_add_memory_range(mem_base, mem_end - mem_base);
+ }
+}
+
+
/*
* Calculate the total memory size required to be reserved for
* firmware-assisted dump registration.
@@ -264,24 +431,113 @@ static unsigned long get_fadump_area_size(void)
return size;
}
-static void __init fadump_reserve_crash_area(unsigned long base,
- unsigned long size)
+/* Preserve everything above the base address */
+static void __init fadump_reserve_crash_area(unsigned long base)
{
- struct memblock_region *reg;
- unsigned long mstart, mend, msize;
+ int i;
+ unsigned long mstart, msize;
- for_each_memblock(memory, reg) {
- mstart = max_t(unsigned long, base, reg->base);
- mend = reg->base + reg->size;
- mend = min(base + size, mend);
-
- if (mstart < mend) {
- msize = mend - mstart;
- memblock_reserve(mstart, msize);
- pr_info("Reserved %ldMB of memory at %#016lx for saving crash dump\n",
- (msize >> 20), mstart);
+ for (i = 0; i < memory_ranges_cnt; i++) {
+ mstart = memory_ranges[i].base;
+ msize = memory_ranges[i].size;
+ if ((mstart + msize) < base)
+ continue;
+
+ if (mstart < base) {
+ msize -= (base - mstart);
+ mstart = base;
}
+ pr_info("Reserving %luMB of memory at %#016lx for saving crash dump",
+ (msize >> 20), mstart);
+ memblock_reserve(mstart, msize);
+ }
+}
+
+static int __init add_rmr_region(unsigned long rmr_start,
+ unsigned long rmr_size)
+{
+ int i = fw_dump.rmr_regions_cnt++;
+
+ if (fw_dump.rmr_regions_cnt > MAX_REAL_MEM_REGIONS)
+ return 0;
+
+ pr_debug("Added real memory range[%d] [%#016lx-%#016lx)\n",
+ i, rmr_start, (rmr_start + rmr_size));
+ fw_dump.rmr_src_addr[i] = rmr_start;
+ fw_dump.rmr_src_size[i] = rmr_size;
+ return 1;
+}
+
+/*
+ * Platforms like PowerNV have an upper limit on the size.
+ * If 'rmr_size' is bigger than that limit, split this memory range
+ * into multiple entries.
+ */
+static int __init add_rmr_regions(unsigned long rmr_start,
+ unsigned long rmr_size)
+{
+ unsigned long rstart, rsize, max_size;
+ int ret = 1;
+
+ rstart = rmr_start;
+ max_size = fw_dump.max_copy_size ? fw_dump.max_copy_size : rmr_size;
+ while (rmr_size) {
+ if (rmr_size > max_size)
+ rsize = max_size;
+ else
+ rsize = rmr_size;
+
+ ret = add_rmr_region(rstart, rsize);
+ if (!ret)
+ break;
+
+ rmr_size -= rsize;
+ rstart += rsize;
}
+
+ return ret;
+}
+
+static int __init fadump_get_rmr_regions(void)
+{
+ int i, ret = 1;
+ unsigned long base, size, last_end;
+ unsigned long mem_size = fw_dump.boot_memory_size;
+
+ fw_dump.rmr_regions_cnt = 0;
+ fw_dump.boot_memory_hole_size = 0;
+
+ /*
+ * TODO: Extent support for multiple real memory regions on
+ * pseries platform too.
+ */
+ if (fw_dump.fadump_platform == FADUMP_PLATFORM_PSERIES) {
+ ret = add_rmr_regions(RMA_START, fw_dump.boot_memory_size);
+ return ret;
+ }
+
+ last_end = memory_ranges[0].base;
+ for (i = 0; i < memory_ranges_cnt; i++) {
+ base = memory_ranges[i].base;
+ size = memory_ranges[i].size;
+
+ if (base > last_end)
+ fw_dump.boot_memory_hole_size += (base - last_end);
+
+ if (size >= mem_size) {
+ ret = add_rmr_regions(base, mem_size);
+ break;
+ }
+
+ mem_size -= size;
+ ret = add_rmr_regions(base, size);
+ if (!ret)
+ break;
+
+ last_end = base + size;
+ }
+
+ return ret;
}
int __init fadump_reserve_mem(void)
@@ -297,6 +553,10 @@ int __init fadump_reserve_mem(void)
fw_dump.fadump_enabled = 0;
return 0;
}
+
+ fadump_get_reserved_ranges();
+ fadump_setup_memory_ranges();
+
/*
* Initialize boot memory size
* If dump is active then we have already calculated the size during
@@ -311,6 +571,11 @@ int __init fadump_reserve_mem(void)
FADUMP_CMA_ALIGNMENT);
#endif
fw_dump.rmr_source_len = fw_dump.boot_memory_size;
+ if (!fadump_get_rmr_regions()) {
+ fw_dump.fadump_enabled = 0;
+ pr_err("Too many holes in boot memory area to enable fadump\n");
+ return 0;
+ }
}
size = get_fadump_area_size();
@@ -335,6 +600,7 @@ int __init fadump_reserve_mem(void)
else
memory_boundary = memblock_end_of_DRAM();
+ base = fw_dump.boot_memory_size + fw_dump.boot_memory_hole_size;
if (fw_dump.dump_active) {
#ifdef CONFIG_HUGETLB_PAGE
/*
@@ -348,11 +614,9 @@ int __init fadump_reserve_mem(void)
* If last boot has crashed then reserve all the memory
* above boot_memory_size so that we don't touch it until
* dump is written to disk by userspace tool. This memory
- * will be released for general use once the dump is saved.
+ * can be released for general use by invalidating fadump.
*/
- base = fw_dump.boot_memory_size;
- size = memory_boundary - base;
- fadump_reserve_crash_area(base, size);
+ fadump_reserve_crash_area(base);
fw_dump.fadumphdr_addr =
fadump_ops->get_meta_area_start(&fw_dump);
@@ -366,23 +630,22 @@ int __init fadump_reserve_mem(void)
* use memblock_find_in_range() here since it doesn't allocate
* from bottom to top.
*/
- for (base = fw_dump.boot_memory_size;
- base <= (memory_boundary - size);
- base += size) {
+ while (base <= (memory_boundary - size)) {
if (memblock_is_region_memory(base, size) &&
!memblock_is_region_reserved(base, size))
break;
+
+ base += size;
}
+
if ((base > (memory_boundary - size)) ||
memblock_reserve(base, size)) {
pr_err("Failed to reserve memory\n");
return 0;
}
- pr_info("Reserved %ldMB of memory at %ldMB for firmware-"
- "assisted dump (System RAM: %ldMB)\n",
- (unsigned long)(size >> 20),
- (unsigned long)(base >> 20),
+ pr_info("Reserved %ldMB of memory at %#016lx (System RAM: %ldMB)\n",
+ (unsigned long)(size >> 20), base,
(unsigned long)(memblock_phys_mem_size() >> 20));
fw_dump.reserve_dump_area_start = base;
@@ -543,23 +806,28 @@ static int fadump_init_elfcore_header(char *bufp)
*/
static void fadump_setup_crash_memory_ranges(void)
{
- struct memblock_region *reg;
- unsigned long long start, end;
+ unsigned long long start, end, offset;
+ int i;
pr_debug("Setup crash memory ranges.\n");
crash_mem_ranges = 0;
+ offset = fw_dump.boot_memory_size + fw_dump.boot_memory_hole_size;
+
/*
- * add the first memory chunk (RMA_START through boot_memory_size) as
- * a separate memory chunk. The reason is, at the time crash firmware
- * will move the content of this memory chunk to different location
- * specified during fadump registration. We need to create a separate
- * program header for this chunk with the correct offset.
+ * Add real memory region(s) whose content is going to be moved to
+ * a different location, specified during fadump registration, by
+ * firmware at the time of crash. We need to create separate program
+ * header(s) for this memory chunk with the correct offset.
*/
- fadump_add_crash_memory(RMA_START, fw_dump.boot_memory_size);
+ for (i = 0; i < fw_dump.rmr_regions_cnt; i++) {
+ start = fw_dump.rmr_src_addr[i];
+ end = start + fw_dump.rmr_src_size[i];
+ fadump_add_crash_memory(start, end);
+ }
- for_each_memblock(memory, reg) {
- start = (unsigned long long)reg->base;
- end = start + (unsigned long long)reg->size;
+ for (i = 0; i < memory_ranges_cnt; i++) {
+ start = memory_ranges[i].base;
+ end = start + memory_ranges[i].size;
/*
* skip the first memory chunk that is already added (RMA_START
@@ -567,9 +835,9 @@ static void fadump_setup_crash_memory_ranges(void)
* when RMA_START changes to a non-zero value.
*/
BUILD_BUG_ON(RMA_START != 0);
- if (start < fw_dump.boot_memory_size) {
- if (end > fw_dump.boot_memory_size)
- start = fw_dump.boot_memory_size;
+ if (start < offset) {
+ if (end > offset)
+ start = offset;
else
continue;
}
@@ -586,17 +854,32 @@ static void fadump_setup_crash_memory_ranges(void)
*/
static inline unsigned long fadump_relocate(unsigned long paddr)
{
- if (paddr > RMA_START && paddr < fw_dump.boot_memory_size)
- return fw_dump.rmr_destination_addr + paddr;
- else
- return paddr;
+ unsigned long raddr, rstart, rend, offset;
+ int i;
+
+ offset = 0;
+ raddr = paddr;
+ for (i = 0; i < fw_dump.rmr_regions_cnt; i++) {
+ rstart = fw_dump.rmr_src_addr[i];
+ rend = rstart + fw_dump.rmr_src_size[i];
+
+ if (paddr > rstart && paddr < rend) {
+ raddr += fw_dump.rmr_destination_addr + offset;
+ break;
+ }
+
+ offset += fw_dump.rmr_src_size[i];
+ }
+
+ return raddr;
}
static int fadump_create_elfcore_headers(char *bufp)
{
struct elfhdr *elf;
struct elf_phdr *phdr;
- int i;
+ unsigned long long raddr, offset;
+ int i, j;
fadump_init_elfcore_header(bufp);
elf = (struct elfhdr *)bufp;
@@ -639,9 +922,12 @@ static int fadump_create_elfcore_headers(char *bufp)
(elf->e_phnum)++;
/* setup PT_LOAD sections. */
-
+ j = 0;
+ offset = 0;
+ raddr = fw_dump.rmr_src_addr[0];
for (i = 0; i < crash_mem_ranges; i++) {
unsigned long long mbase, msize;
+
mbase = crash_memory_ranges[i].base;
msize = crash_memory_ranges[i].size;
@@ -654,13 +940,17 @@ static int fadump_create_elfcore_headers(char *bufp)
phdr->p_flags = PF_R|PF_W|PF_X;
phdr->p_offset = mbase;
- if (mbase == RMA_START) {
+ if (mbase == raddr) {
/*
* The entire RMA region will be moved by firmware
* to the specified destination_address. Hence set
* the correct offset.
*/
- phdr->p_offset = fw_dump.rmr_destination_addr;
+ phdr->p_offset = fw_dump.rmr_destination_addr + offset;
+ if (j < (fw_dump.rmr_regions_cnt - 1)) {
+ offset += fw_dump.rmr_src_size[j];
+ raddr = fw_dump.rmr_src_addr[++j];
+ }
}
phdr->p_paddr = mbase;
@@ -707,6 +997,7 @@ static int register_fadump(void)
if (!fw_dump.reserve_dump_area_size)
return -ENODEV;
+ fadump_setup_memory_ranges();
fadump_setup_crash_memory_ranges();
addr = fadump_ops->get_meta_area_start(&fw_dump);
@@ -822,14 +1113,14 @@ static void fadump_invalidate_release_mem(void)
* later for releasing the memory for general use.
*/
reserved_area_start = fw_dump.reserve_dump_area_start;
- reserved_area_end = reserved_area_start +
- fw_dump.reserve_dump_area_size;
+ reserved_area_end =
+ memory_limit ? memory_limit : memblock_end_of_DRAM();
+
/*
- * Setup reserve_dump_area_start and its size so that we can
- * reuse this reserved memory for Re-registration.
+ * Setup reserve_dump_area_start so that we can reuse this
+ * reserved memory for Re-registration.
*/
fw_dump.reserve_dump_area_start = destination_address;
- fw_dump.reserve_dump_area_size = get_fadump_area_size();
fadump_release_memory(reserved_area_start, reserved_area_end);
if (fw_dump.cpu_notes_buf) {
diff --git a/arch/powerpc/kernel/fadump_internal.h b/arch/powerpc/kernel/fadump_internal.h
index 3791da7..eae4b55 100644
--- a/arch/powerpc/kernel/fadump_internal.h
+++ b/arch/powerpc/kernel/fadump_internal.h
@@ -49,6 +49,7 @@
/* Firmware-Assited Dump platforms */
#define FADUMP_PLATFORM_PSERIES 1
+#define FADUMP_PLATFORM_POWERNV 2
#define FADUMP_CPU_ID_MASK ((1UL << 32) - 1)
@@ -92,11 +93,14 @@ struct fadump_crash_info_header {
/* Crash memory ranges */
#define INIT_CRASHMEM_RANGES (INIT_MEMBLOCK_REGIONS + 2)
-struct fad_crash_memory_ranges {
+struct fadump_memory_range {
unsigned long long base;
unsigned long long size;
};
+/* Maximum no. of real memory regions supported by the kernel */
+#define MAX_REAL_MEM_REGIONS 6
+
/* Firmware-assisted dump configuration details. */
struct fw_dump {
unsigned long cpu_state_data_size;
@@ -114,6 +118,17 @@ struct fw_dump {
unsigned long rmr_source_len;
unsigned long rmr_destination_addr;
+ unsigned long boot_memory_hole_size;
+ unsigned long rmr_regions_cnt;
+ unsigned long rmr_src_addr[MAX_REAL_MEM_REGIONS];
+ unsigned long rmr_src_size[MAX_REAL_MEM_REGIONS];
+
+ /*
+ * Maximum size supported by firmware to copy from source to
+ * destination address per entry.
+ */
+ unsigned long max_copy_size;
+
int ibm_configure_kernel_dump;
unsigned long fadump_enabled:1;
@@ -157,4 +172,14 @@ pseries_dt_scan_fadump(struct fw_dump *fadump_config, ulong node)
}
#endif
+#ifdef CONFIG_PPC_POWERNV
+extern int powernv_dt_scan_fadump(struct fw_dump *fadump_config, ulong node);
+#else
+static inline int
+powernv_dt_scan_fadump(struct fw_dump *fadump_config, ulong node)
+{
+ return 1;
+}
+#endif
+
#endif /* __PPC64_FA_DUMP_INTERNAL_H__ */
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index 703a350..0d106b5 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -6,6 +6,7 @@ obj-y += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o
obj-y += opal-kmsg.o opal-powercap.o opal-psr.o opal-sensor-groups.o
obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o
+obj-$(CONFIG_FA_DUMP) += powernv_fadump.o
obj-$(CONFIG_PCI) += pci.o pci-ioda.o npu-dma.o
obj-$(CONFIG_CXL_BASE) += pci-cxl.o
obj-$(CONFIG_EEH) += eeh-powernv.o
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 3da30c2..20bbb9c 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -325,3 +325,4 @@ OPAL_CALL(opal_npu_spa_clear_cache, OPAL_NPU_SPA_CLEAR_CACHE);
OPAL_CALL(opal_npu_tl_set, OPAL_NPU_TL_SET);
OPAL_CALL(opal_pci_get_pbcq_tunnel_bar, OPAL_PCI_GET_PBCQ_TUNNEL_BAR);
OPAL_CALL(opal_pci_set_pbcq_tunnel_bar, OPAL_PCI_SET_PBCQ_TUNNEL_BAR);
+OPAL_CALL(opal_configure_fadump, OPAL_CONFIGURE_FADUMP);
diff --git a/arch/powerpc/platforms/powernv/powernv_fadump.c b/arch/powerpc/platforms/powernv/powernv_fadump.c
new file mode 100644
index 0000000..6d4b515
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/powernv_fadump.c
@@ -0,0 +1,337 @@
+/*
+ * Firmware-Assisted Dump support on POWERNV platform.
+ *
+ * Copyright 2018, IBM Corporation
+ * Author: Hari Bathini <hbathini@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#undef DEBUG
+#define pr_fmt(fmt) "powernv fadump: " fmt
+
+#include <linux/string.h>
+#include <linux/memblock.h>
+#include <linux/bug.h>
+#include <linux/seq_file.h>
+#include <linux/crash_dump.h>
+
+#include <asm/page.h>
+#include <asm/prom.h>
+#include <asm/opal.h>
+#include <asm/fadump.h>
+
+#include "../../kernel/fadump_internal.h"
+#include "powernv_fadump.h"
+
+static struct powernv_fadump_mem_struct fdm;
+static const struct powernv_fadump_mem_struct *fdm_active;
+unsigned long fdm_actual_size;
+
+static void update_fadump_config(struct fw_dump *fadump_conf,
+ const struct powernv_fadump_mem_struct *fdm)
+{
+ unsigned long base, size, last_end;
+ int section_cnt = be16_to_cpu(fdm->section_count);
+ int unused_sections = (POWERNV_MAX_SECTIONS - section_cnt);
+ int i, j;
+
+ pr_debug("section_cnt: %d\n", section_cnt);
+ WARN_ON(unused_sections < 0);
+ fdm_actual_size = sizeof(*fdm) -
+ (unused_sections * sizeof(struct powernv_fadump_section));
+
+ /*
+ * The first real memory region entry is the real memory
+ * regions destination address.
+ */
+ fadump_conf->rmr_destination_addr = 0;
+ for (i = 0; i < section_cnt; i++) {
+ if (fdm->section[i].src_type ==
+ POWERNV_FADUMP_REAL_MODE_REGION) {
+ fadump_conf->rmr_destination_addr =
+ be64_to_cpu(fdm->section[i].dest_addr);
+ break;
+ }
+ }
+ pr_debug("Destination address of real memory regions: %#016lx\n",
+ fadump_conf->rmr_destination_addr);
+
+ if (fadump_conf->dump_active) {
+ j = 0;
+ last_end = 0;
+ fadump_conf->rmr_source_len = 0;
+ fadump_conf->boot_memory_hole_size = 0;
+ for (i = 0; i < section_cnt; i++) {
+ if (fdm->section[i].src_type ==
+ POWERNV_FADUMP_REAL_MODE_REGION) {
+ base = be64_to_cpu(fdm->section[i].src_addr);
+ size = be64_to_cpu(fdm->section[i].src_size);
+ pr_debug("%d. RMR base: 0x%lx, size: 0x%lx\n",
+ (i + 1), base, size);
+
+ fadump_conf->rmr_src_addr[j] = base;
+ fadump_conf->rmr_src_size[j] = size;
+ fadump_conf->rmr_source_len += size;
+
+ if (base > last_end) {
+ fadump_conf->boot_memory_hole_size +=
+ (base - last_end);
+ }
+
+ last_end = base + size;
+ j++;
+ }
+ }
+ fadump_conf->rmr_regions_cnt = j;
+ pr_debug("Real memory regions count: %lu\n",
+ fadump_conf->rmr_regions_cnt);
+ }
+}
+
+static ulong powernv_init_fadump_mem_struct(struct fw_dump *fadump_conf,
+ ulong addr)
+{
+ int i, section_cnt = 0;
+
+ fdm.section_size = cpu_to_be16(sizeof(struct powernv_fadump_section));
+
+ /* RMA region sections */
+ for (i = 0; i < fadump_conf->rmr_regions_cnt; i++) {
+ fdm.section[RMR_REGION_INPUT_IDX + i].src_type =
+ POWERNV_FADUMP_REAL_MODE_REGION;
+ fdm.section[RMR_REGION_INPUT_IDX + i].src_addr =
+ cpu_to_be64(fadump_conf->rmr_src_addr[i]);
+ fdm.section[RMR_REGION_INPUT_IDX + i].dest_addr =
+ cpu_to_be64(addr);
+ fdm.section[RMR_REGION_INPUT_IDX + i].src_size =
+ fdm.section[RMR_REGION_INPUT_IDX + i].dest_size =
+ cpu_to_be64(fadump_conf->rmr_src_size[i]);
+
+ section_cnt++;
+ addr += fadump_conf->rmr_src_size[i];
+ }
+
+ fdm.section_count = cpu_to_be16(section_cnt);
+ update_fadump_config(fadump_conf, &fdm);
+
+ return addr;
+}
+
+static int powernv_register_fadump(struct fw_dump *fadump_conf)
+{
+ int rc, err = -EIO;
+
+ rc = opal_configure_fadump(FADUMP_REGISTER, &fdm, fdm_actual_size);
+ switch (rc) {
+ default:
+ pr_err("Failed to register. Unknown Error(%d).\n", rc);
+ break;
+ case OPAL_UNSUPPORTED:
+ pr_err("Support not available.\n");
+ fadump_conf->fadump_supported = 0;
+ fadump_conf->fadump_enabled = 0;
+ break;
+ case OPAL_INTERNAL_ERROR:
+ pr_err("Failed to register. Hardware Error(%d).\n", rc);
+ break;
+ case OPAL_PARAMETER:
+ pr_err("Failed to register. Parameter Error(%d).\n", rc);
+ break;
+ case OPAL_PERMISSION:
+ pr_err("Already registered!\n");
+ fadump_conf->dump_registered = 1;
+ err = -EEXIST;
+ break;
+ case OPAL_SUCCESS:
+ pr_err("Registration is successful!\n");
+ fadump_conf->dump_registered = 1;
+ err = 0;
+ break;
+ }
+
+ return err;
+}
+
+static int powernv_unregister_fadump(struct fw_dump *fadump_conf)
+{
+ int rc;
+
+ rc = opal_configure_fadump(FADUMP_UNREGISTER, &fdm, fdm_actual_size);
+ if (rc) {
+ pr_err("Failed to un-register - unexpected Error(%d).\n", rc);
+ return -EIO;
+ }
+
+ fadump_conf->dump_registered = 0;
+ return 0;
+}
+
+static ulong powernv_get_preserv_area_start(struct fw_dump *fadump_conf)
+{
+ return fadump_conf->rmr_destination_addr;
+}
+
+static ulong powernv_get_meta_area_start(struct fw_dump *fadump_conf)
+{
+ return (fadump_conf->rmr_destination_addr +
+ fadump_conf->rmr_source_len);
+}
+
+static int powernv_invalidate_fadump(struct fw_dump *fadump_conf)
+{
+ int rc;
+
+ rc = opal_configure_fadump(FADUMP_INVALIDATE, (void *)fdm_active,
+ fdm_actual_size);
+ if (rc) {
+ pr_err("Failed to invalidate - unexpected Error(%d).\n", rc);
+ return -EIO;
+ }
+
+ fadump_conf->dump_active = 0;
+ fdm_active = NULL;
+ return 0;
+}
+
+/*
+ * Read CPU state dump data and convert it into ELF notes.
+ * The CPU dump starts with magic number "REGSAVE". NumCpusOffset should be
+ * used to access the data to allow for additional fields to be added without
+ * affecting compatibility. Each list of registers for a CPU starts with
+ * "CPUSTRT" and ends with "CPUEND". Each register entry is of 16 bytes,
+ * 8 Byte ASCII identifier and 8 Byte register value. The register entry
+ * with identifier "CPUSTRT" and "CPUEND" contains 4 byte cpu id as part
+ * of register value. For more details refer to PAPR document.
+ *
+ * Only for the crashing cpu we ignore the CPU dump data and get exact
+ * state from fadump crash info structure populated by first kernel at the
+ * time of crash.
+ */
+static int __init fadump_build_cpu_notes(struct fw_dump *fadump_conf)
+{
+ u32 num_cpus = 1, *note_buf;
+ struct fadump_crash_info_header *fdh = NULL;
+
+ /* Allocate buffer to hold cpu crash notes. */
+ fadump_conf->cpu_notes_buf_size = num_cpus * sizeof(note_buf_t);
+ fadump_conf->cpu_notes_buf_size =
+ PAGE_ALIGN(fadump_conf->cpu_notes_buf_size);
+ note_buf = fadump_cpu_notes_buf_alloc(fadump_conf->cpu_notes_buf_size);
+ if (!note_buf) {
+ pr_err("Failed to allocate 0x%lx bytes for cpu notes buffer\n",
+ fadump_conf->cpu_notes_buf_size);
+ return -ENOMEM;
+ }
+ fadump_conf->cpu_notes_buf = __pa(note_buf);
+
+ pr_debug("Allocated buffer for cpu notes of size %ld at %p\n",
+ (num_cpus * sizeof(note_buf_t)), note_buf);
+
+ if (fadump_conf->fadumphdr_addr)
+ fdh = __va(fadump_conf->fadumphdr_addr);
+
+ if (fdh && (fdh->crashing_cpu != CPU_UNKNOWN)) {
+ note_buf = fadump_regs_to_elf_notes(note_buf, &(fdh->regs));
+ final_note(note_buf);
+
+ pr_debug("Updating elfcore header (%llx) with cpu notes\n",
+ fdh->elfcorehdr_addr);
+ fadump_update_elfcore_header(fadump_conf,
+ __va(fdh->elfcorehdr_addr));
+ }
+
+ return 0;
+}
+
+static int __init powernv_process_fadump(struct fw_dump *fadump_conf)
+{
+ struct fadump_crash_info_header *fdh;
+ int rc = 0;
+
+ if (!fdm_active || !fadump_conf->fadumphdr_addr)
+ return -EINVAL;
+
+ /* Validate the fadump crash info header */
+ fdh = __va(fadump_conf->fadumphdr_addr);
+ if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) {
+ pr_err("Crash info header is not valid.\n");
+ return -EINVAL;
+ }
+
+ /*
+ * TODO: To build cpu notes, find a way to map PIR to logical id.
+ * Also, we may need different method for pseries and powernv.
+ * The currently booted kernel could have a different PIR to
+ * logical id mapping. So, try saving info of previous kernel's
+ * paca to get the right PIR to logical id mapping.
+ */
+ rc = fadump_build_cpu_notes(fadump_conf);
+ if (rc)
+ return rc;
+
+ /*
+ * We are done validating dump info and elfcore header is now ready
+ * to be exported. set elfcorehdr_addr so that vmcore module will
+ * export the elfcore header through '/proc/vmcore'.
+ */
+ elfcorehdr_addr = fdh->elfcorehdr_addr;
+
+ return rc;
+}
+
+static void powernv_fadump_region_show(struct fw_dump *fadump_conf,
+ struct seq_file *m)
+{
+}
+
+static void powernv_crash_fadump(const char *msg)
+{
+ int rc;
+
+ rc = opal_cec_reboot2(OPAL_REBOOT_MPIPL, msg);
+ if (rc == OPAL_UNSUPPORTED)
+ pr_emerg("Reboot type %d not supported\n", OPAL_REBOOT_MPIPL);
+ else if (rc == OPAL_HARDWARE)
+ pr_emerg("No backend support for MPIPL!\n");
+}
+
+static struct fadump_ops_t powernv_fadump_ops = {
+ .init_fadump_mem_struct = powernv_init_fadump_mem_struct,
+ .register_fadump = powernv_register_fadump,
+ .unregister_fadump = powernv_unregister_fadump,
+ .get_preserv_area_start = powernv_get_preserv_area_start,
+ .get_meta_area_start = powernv_get_meta_area_start,
+ .invalidate_fadump = powernv_invalidate_fadump,
+ .process_fadump = powernv_process_fadump,
+ .fadump_region_show = powernv_fadump_region_show,
+ .crash_fadump = powernv_crash_fadump,
+};
+
+int __init powernv_dt_scan_fadump(struct fw_dump *fadump_conf, ulong node)
+{
+ /*
+ * Firmware currently supports only 32-bit value for size,
+ * align it to 1MB size.
+ */
+ fadump_conf->max_copy_size = _ALIGN_DOWN(0xFFFFFFFF, (1 << 20));
+
+ /*
+ * Check if dump has been initiated on last reboot.
+ */
+ fdm_active = of_get_flat_dt_prop(node, "result-table", NULL);
+ if (fdm_active) {
+ pr_info("Firmware-assisted dump is active.\n");
+ fadump_conf->dump_active = 1;
+ update_fadump_config(fadump_conf, (void *)__pa(fdm_active));
+ }
+
+ fadump_ops = &powernv_fadump_ops;
+ fadump_conf->fadump_supported = 1;
+ fadump_conf->fadump_platform = FADUMP_PLATFORM_POWERNV;
+
+ return 1;
+}
diff --git a/arch/powerpc/platforms/powernv/powernv_fadump.h b/arch/powerpc/platforms/powernv/powernv_fadump.h
new file mode 100644
index 0000000..224a142
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/powernv_fadump.h
@@ -0,0 +1,63 @@
+/*
+ * Firmware-Assisted Dump support on POWERNV platform.
+ *
+ * Copyright 2018, IBM Corporation
+ * Author: Hari Bathini <hbathini@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef __PPC64_POWERNV_FA_DUMP_H__
+#define __PPC64_POWERNV_FA_DUMP_H__
+
+#define POWERNV_FADUMP_CPU_STATE_DATA 0x0000
+/* OPAL : 0x01 – 0x39 */
+#define POWERNV_FADUMP_OPAL_REGION 0x0001
+/* Firmware/SMF : 0x40 – 0x79 */
+#define POWERNV_FADUMP_FW_REGION 0x0040
+/* Kernel memory region : 0x80 – 0xb9 */
+#define POWERNV_FADUMP_REAL_MODE_REGION 0x0080
+/* Reserved for future use : 0xc0 – 0xff */
+#define POWERNV_FADUMP_RESERVED_REGION 0x00c0
+
+enum powernv_fadump_section_types {
+ CPU_STATE_TYPE = 0,
+ OPAL_REGION_TYPE,
+ FW_REGION_TYPE,
+ RMR_REGION_TYPE,
+ POWERNV_SECTIONS
+};
+
+/* Starting index of RMR region in dump sections while registering */
+#define RMR_REGION_INPUT_IDX 0
+
+#define POWERNV_MAX_SECTIONS (POWERNV_SECTIONS + \
+ MAX_REAL_MEM_REGIONS - 1)
+
+/* Kernel Dump section info */
+struct powernv_fadump_section {
+ u8 src_type;
+ u8 reserved[7];
+ __be64 src_addr;
+ __be64 src_size;
+ __be64 dest_addr;
+ __be64 dest_size;
+};
+
+/*
+ * Firmware Assisted dump memory structure. This structure is required for
+ * registering future kernel dump with power firmware through opal call.
+ */
+struct powernv_fadump_mem_struct {
+
+ __be16 section_size; /*sizeof(struct fadump_section) */
+ __be16 section_count; /* number of sections */
+ __be32 reserved;
+
+ struct powernv_fadump_section section[POWERNV_MAX_SECTIONS];
+};
+
+#endif /* __PPC64_POWERNV_FA_DUMP_H__ */
diff --git a/arch/powerpc/platforms/pseries/pseries_fadump.c b/arch/powerpc/platforms/pseries/pseries_fadump.c
index ac54501..ef7e59a 100644
--- a/arch/powerpc/platforms/pseries/pseries_fadump.c
+++ b/arch/powerpc/platforms/pseries/pseries_fadump.c
@@ -40,8 +40,12 @@ static void update_fadump_config(struct fw_dump *fadump_conf,
be64_to_cpu(fdm->rmr_region.destination_address);
if (fadump_conf->dump_active) {
- fadump_conf->rmr_source_len =
- be64_to_cpu(fdm->rmr_region.source_len);
+ fadump_conf->rmr_src_addr[0] =
+ be64_to_cpu(fdm->rmr_region.source_address);
+ fadump_conf->rmr_src_size[0] = be64_to_cpu(fdm->rmr_region.source_len);
+ fadump_conf->rmr_regions_cnt = 1;
+ fadump_conf->rmr_source_len = fadump_conf->rmr_src_size[0];
+ fadump_conf->boot_memory_hole_size = 0;
}
}
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [RFC PATCH 4/5] powerpc/fadump: process architected register state data provided by firmware
2018-05-15 4:58 [RFC PATCH 0/5] Add FADump support on PowerNV platform Hari Bathini
` (2 preceding siblings ...)
2018-05-15 4:59 ` [RFC PATCH 3/5] powerpc/fadump: enable fadump support on powernv platform Hari Bathini
@ 2018-05-15 4:59 ` Hari Bathini
2018-05-15 4:59 ` [RFC PATCH 5/5] powerpc/powernv: export /proc/opaldump for analysing opal crashes Hari Bathini
4 siblings, 0 replies; 6+ messages in thread
From: Hari Bathini @ 2018-05-15 4:59 UTC (permalink / raw)
To: Ananth N Mavinakayanahalli, Michael Ellerman,
Mahesh J Salgaonkar, Vasant Hegde, linuxppc-dev, Stewart Smith
Cc: Hari Bathini
From: Hari Bathini <hbathini@linux.vnet.ibm.com>
Firmware provides architected register state data at the time of crash.
This data contains PIR value. Need to store the logical CPUs PIR values
to match the data provided by f/w with the corresponding logical CPU.
Signed-off-by: Hari Bathini <hbathini@linux.vnet.ibm.com>
---
arch/powerpc/kernel/fadump.c | 38 ++++++
arch/powerpc/kernel/fadump_internal.h | 12 ++
arch/powerpc/platforms/powernv/powernv_fadump.c | 146 +++++++++++++++++++++--
arch/powerpc/platforms/powernv/powernv_fadump.h | 13 ++
4 files changed, 195 insertions(+), 14 deletions(-)
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index a27e4af..8cafa2b 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -421,6 +421,7 @@ static unsigned long get_fadump_area_size(void)
size += fw_dump.cpu_state_data_size;
size += fw_dump.hpte_region_size;
size += fw_dump.boot_memory_size;
+ size += fw_dump.backup_area_size;
size += sizeof(struct fadump_crash_info_header);
size += sizeof(struct elfhdr); /* ELF core header.*/
size += sizeof(struct elf_phdr); /* place holder for cpu notes */
@@ -985,6 +986,37 @@ static unsigned long init_fadump_header(unsigned long addr)
return addr;
}
+static inline void read_pir(void *val)
+{
+ *(unsigned long *)val = mfspr(SPRN_PIR);
+}
+
+static unsigned long fadump_populate_backup_area(void)
+{
+ struct fadump_backup_area *backup_info;
+ unsigned int i, size = sizeof(struct fadump_backup_area);
+ unsigned long addr;
+
+ if (fadump_ops->get_backup_area_start)
+ return 0;
+
+ addr = fadump_ops->get_backup_area_start(&fw_dump);
+ backup_info = __va(addr);
+ addr += fw_dump.backup_area_size;
+
+ memset(backup_info, 0, size);
+ backup_info->size = size;
+ backup_info->nr_threads = nr_cpu_ids;
+ for (i = 0; i < nr_cpu_ids; i++) {
+ smp_call_function_single(i, read_pir,
+ &(backup_info->thread_pir[i]), 1);
+ pr_debug("Logical CPU: %d, PIR: 0x%lx\n",
+ i, backup_info->thread_pir[i]);
+ }
+
+ return addr;
+}
+
static int register_fadump(void)
{
unsigned long addr;
@@ -1313,9 +1345,13 @@ int __init setup_fadump(void)
fadump_invalidate_release_mem();
}
/* Initialize the kernel dump memory structure for FAD registration. */
- else if (fw_dump.reserve_dump_area_size)
+ else if (fw_dump.reserve_dump_area_size) {
fadump_ops->init_fadump_mem_struct(&fw_dump,
fw_dump.reserve_dump_area_start);
+ /* TODO: Extend this to pseries too */
+ if (fw_dump.fadump_platform == FADUMP_PLATFORM_POWERNV)
+ fadump_populate_backup_area();
+ }
fadump_init_files();
return 1;
diff --git a/arch/powerpc/kernel/fadump_internal.h b/arch/powerpc/kernel/fadump_internal.h
index eae4b55..f391405 100644
--- a/arch/powerpc/kernel/fadump_internal.h
+++ b/arch/powerpc/kernel/fadump_internal.h
@@ -101,9 +101,20 @@ struct fadump_memory_range {
/* Maximum no. of real memory regions supported by the kernel */
#define MAX_REAL_MEM_REGIONS 6
+/* Backup area populated with data for processing in capture kernel */
+struct fadump_backup_area {
+ unsigned int size;
+ unsigned int nr_threads;
+ unsigned long thread_pir[NR_CPUS];
+};
+
/* Firmware-assisted dump configuration details. */
struct fw_dump {
+ unsigned long cpu_state_destination_addr;
+ unsigned long cpu_state_data_version;
+ unsigned long cpu_state_entry_size;
unsigned long cpu_state_data_size;
+ unsigned long backup_area_size;
unsigned long hpte_region_size;
unsigned long boot_memory_size;
unsigned long reserve_dump_area_start;
@@ -145,6 +156,7 @@ struct fadump_ops_t {
int (*unregister_fadump)(struct fw_dump *fadump_config);
ulong (*get_preserv_area_start)(struct fw_dump *fadump_conf);
ulong (*get_meta_area_start)(struct fw_dump *fadump_conf);
+ ulong (*get_backup_area_start)(struct fw_dump *fadump_conf);
int (*invalidate_fadump)(struct fw_dump *fadump_config);
int (*process_fadump)(struct fw_dump *fadump_config);
void (*fadump_region_show)(struct fw_dump *fadump_config,
diff --git a/arch/powerpc/platforms/powernv/powernv_fadump.c b/arch/powerpc/platforms/powernv/powernv_fadump.c
index 6d4b515..36f0360 100644
--- a/arch/powerpc/platforms/powernv/powernv_fadump.c
+++ b/arch/powerpc/platforms/powernv/powernv_fadump.c
@@ -39,6 +39,8 @@ static void update_fadump_config(struct fw_dump *fadump_conf,
int unused_sections = (POWERNV_MAX_SECTIONS - section_cnt);
int i, j;
+ fadump_conf->backup_area_size = sizeof(struct fadump_backup_area);
+
pr_debug("section_cnt: %d\n", section_cnt);
WARN_ON(unused_sections < 0);
fdm_actual_size = sizeof(*fdm) -
@@ -84,6 +86,12 @@ static void update_fadump_config(struct fw_dump *fadump_conf,
last_end = base + size;
j++;
+ } else if (fdm->section[i].src_type ==
+ POWERNV_FADUMP_CPU_STATE_DATA) {
+ fadump_conf->cpu_state_destination_addr =
+ be64_to_cpu(fdm->section[i].dest_addr);
+ fadump_conf->cpu_state_data_size =
+ be64_to_cpu(fdm->section[i].dest_size);
}
}
fadump_conf->rmr_regions_cnt = j;
@@ -178,6 +186,13 @@ static ulong powernv_get_preserv_area_start(struct fw_dump *fadump_conf)
static ulong powernv_get_meta_area_start(struct fw_dump *fadump_conf)
{
return (fadump_conf->rmr_destination_addr +
+ fadump_conf->rmr_source_len +
+ fadump_conf->backup_area_size);
+}
+
+static ulong powernv_get_backup_area_start(struct fw_dump *fadump_conf)
+{
+ return (fadump_conf->rmr_destination_addr +
fadump_conf->rmr_source_len);
}
@@ -197,6 +212,38 @@ static int powernv_invalidate_fadump(struct fw_dump *fadump_conf)
return 0;
}
+static inline int fadump_get_logical_cpu(struct fadump_backup_area *ba, u32 pir)
+{
+ int i = 0, cpu = CPU_UNKNOWN;
+
+ while (i < ba->nr_threads) {
+ if (ba->thread_pir[i] == pir) {
+ cpu = i;
+ break;
+ }
+ i++;
+ }
+
+ return cpu;
+}
+
+static struct fadump_reg_entry*
+fadump_read_registers(unsigned int regs_per_thread,
+ struct fadump_reg_entry *reg_entry,
+ struct pt_regs *regs)
+{
+ int i;
+
+ memset(regs, 0, sizeof(struct pt_regs));
+
+ for (i = 0; i < regs_per_thread; i++) {
+ fadump_set_regval(regs, be64_to_cpu(reg_entry->reg_id),
+ be64_to_cpu(reg_entry->reg_value));
+ reg_entry++;
+ }
+ return reg_entry;
+}
+
/*
* Read CPU state dump data and convert it into ELF notes.
* The CPU dump starts with magic number "REGSAVE". NumCpusOffset should be
@@ -213,8 +260,34 @@ static int powernv_invalidate_fadump(struct fw_dump *fadump_conf)
*/
static int __init fadump_build_cpu_notes(struct fw_dump *fadump_conf)
{
- u32 num_cpus = 1, *note_buf;
+ struct powernv_thread_hdr *thdr;
+ struct fadump_reg_entry *reg_entry;
struct fadump_crash_info_header *fdh = NULL;
+ struct fadump_backup_area *backup_info = NULL;
+ char *bufp, *note_bufp;
+ u32 thread_pir;
+ unsigned long addr;
+ u32 num_cpus, *note_buf;
+ struct pt_regs regs;
+ int i, rc = 0, cpu = 0;
+ unsigned int size_of_each_thread, regs_per_thread;
+
+ addr = powernv_get_backup_area_start(fadump_conf);
+ if (!addr) {
+ pr_err("Unable to read CPU state data\n");
+ return -ENOENT;
+ }
+
+ backup_info = __va(addr);
+ num_cpus = backup_info->nr_threads;
+
+ size_of_each_thread = fadump_conf->cpu_state_entry_size;
+ regs_per_thread =
+ ((size_of_each_thread - CPU_REG_ENTRY_OFFSET) /
+ sizeof(struct fadump_reg_entry));
+
+ addr = fadump_conf->cpu_state_destination_addr;
+ bufp = __va(addr);
/* Allocate buffer to hold cpu crash notes. */
fadump_conf->cpu_notes_buf_size = num_cpus * sizeof(note_buf_t);
@@ -234,10 +307,41 @@ static int __init fadump_build_cpu_notes(struct fw_dump *fadump_conf)
if (fadump_conf->fadumphdr_addr)
fdh = __va(fadump_conf->fadumphdr_addr);
- if (fdh && (fdh->crashing_cpu != CPU_UNKNOWN)) {
- note_buf = fadump_regs_to_elf_notes(note_buf, &(fdh->regs));
- final_note(note_buf);
+ pr_debug("--------CPU State Data------------\n");
+ num_cpus = fadump_conf->cpu_state_data_size / size_of_each_thread;
+ pr_debug("NumCpus : %u\n", num_cpus);
+
+ note_bufp = (char *)note_buf;
+ for (i = 0; i < num_cpus; i++, bufp += size_of_each_thread) {
+ thdr = (struct powernv_thread_hdr *)bufp;
+ thread_pir = be32_to_cpu(thdr->pir);
+ cpu = fadump_get_logical_cpu(backup_info, thread_pir);
+ if (cpu == CPU_UNKNOWN) {
+ pr_err("Unable to read CPU state data");
+ rc = -ENOENT;
+ goto error_out;
+ }
+
+ if (fdh) {
+ if (!cpumask_test_cpu(cpu, &fdh->online_mask))
+ continue;
+
+ if (fdh->crashing_cpu == cpu) {
+ regs = fdh->regs;
+ note_buf = fadump_regs_to_elf_notes(note_buf,
+ ®s);
+ continue;
+ }
+ }
+
+ reg_entry = (struct fadump_reg_entry *)(bufp +
+ CPU_REG_ENTRY_OFFSET);
+ fadump_read_registers(regs_per_thread, reg_entry, ®s);
+ note_buf = fadump_regs_to_elf_notes(note_buf, ®s);
+ }
+ final_note(note_buf);
+ if (fdh) {
pr_debug("Updating elfcore header (%llx) with cpu notes\n",
fdh->elfcorehdr_addr);
fadump_update_elfcore_header(fadump_conf,
@@ -245,6 +349,13 @@ static int __init fadump_build_cpu_notes(struct fw_dump *fadump_conf)
}
return 0;
+
+error_out:
+ fadump_cpu_notes_buf_free((ulong)__va(fadump_conf->cpu_notes_buf),
+ fadump_conf->cpu_notes_buf_size);
+ fadump_conf->cpu_notes_buf = 0;
+ fadump_conf->cpu_notes_buf_size = 0;
+ return rc;
}
static int __init powernv_process_fadump(struct fw_dump *fadump_conf)
@@ -262,13 +373,6 @@ static int __init powernv_process_fadump(struct fw_dump *fadump_conf)
return -EINVAL;
}
- /*
- * TODO: To build cpu notes, find a way to map PIR to logical id.
- * Also, we may need different method for pseries and powernv.
- * The currently booted kernel could have a different PIR to
- * logical id mapping. So, try saving info of previous kernel's
- * paca to get the right PIR to logical id mapping.
- */
rc = fadump_build_cpu_notes(fadump_conf);
if (rc)
return rc;
@@ -305,6 +409,7 @@ static struct fadump_ops_t powernv_fadump_ops = {
.unregister_fadump = powernv_unregister_fadump,
.get_preserv_area_start = powernv_get_preserv_area_start,
.get_meta_area_start = powernv_get_meta_area_start,
+ .get_backup_area_start = powernv_get_backup_area_start,
.invalidate_fadump = powernv_invalidate_fadump,
.process_fadump = powernv_process_fadump,
.fadump_region_show = powernv_fadump_region_show,
@@ -313,6 +418,15 @@ static struct fadump_ops_t powernv_fadump_ops = {
int __init powernv_dt_scan_fadump(struct fw_dump *fadump_conf, ulong node)
{
+ const __be32 *prop;
+
+ prop = of_get_flat_dt_prop(node, "cpu-data-version", NULL);
+ if (prop)
+ fadump_conf->cpu_state_data_version = of_read_number(prop, 1);
+
+ if (fadump_conf->cpu_state_data_version != CPU_STATE_DATA_VERSION)
+ return 1;
+
/*
* Firmware currently supports only 32-bit value for size,
* align it to 1MB size.
@@ -327,6 +441,16 @@ int __init powernv_dt_scan_fadump(struct fw_dump *fadump_conf, ulong node)
pr_info("Firmware-assisted dump is active.\n");
fadump_conf->dump_active = 1;
update_fadump_config(fadump_conf, (void *)__pa(fdm_active));
+
+ /*
+ * Doesn't need to populate these fields while registering dump
+ * as destination address and size are provided by F/W.
+ */
+ prop = of_get_flat_dt_prop(node, "cpu-data-size", NULL);
+ if (prop) {
+ fadump_conf->cpu_state_entry_size =
+ of_read_number(prop, 1);
+ }
}
fadump_ops = &powernv_fadump_ops;
diff --git a/arch/powerpc/platforms/powernv/powernv_fadump.h b/arch/powerpc/platforms/powernv/powernv_fadump.h
index 224a142..33be534 100644
--- a/arch/powerpc/platforms/powernv/powernv_fadump.h
+++ b/arch/powerpc/platforms/powernv/powernv_fadump.h
@@ -13,6 +13,9 @@
#ifndef __PPC64_POWERNV_FA_DUMP_H__
#define __PPC64_POWERNV_FA_DUMP_H__
+#define CPU_STATE_DATA_VERSION 16
+#define CPU_REG_ENTRY_OFFSET 16
+
#define POWERNV_FADUMP_CPU_STATE_DATA 0x0000
/* OPAL : 0x01 – 0x39 */
#define POWERNV_FADUMP_OPAL_REGION 0x0001
@@ -37,6 +40,12 @@ enum powernv_fadump_section_types {
#define POWERNV_MAX_SECTIONS (POWERNV_SECTIONS + \
MAX_REAL_MEM_REGIONS - 1)
+struct powernv_thread_hdr {
+ __be32 pir;
+ u8 core_state;
+ u8 reserved[11];
+} __attribute__ ((packed));
+
/* Kernel Dump section info */
struct powernv_fadump_section {
u8 src_type;
@@ -45,7 +54,7 @@ struct powernv_fadump_section {
__be64 src_size;
__be64 dest_addr;
__be64 dest_size;
-};
+} __attribute__ ((packed));
/*
* Firmware Assisted dump memory structure. This structure is required for
@@ -58,6 +67,6 @@ struct powernv_fadump_mem_struct {
__be32 reserved;
struct powernv_fadump_section section[POWERNV_MAX_SECTIONS];
-};
+} __attribute__ ((packed));
#endif /* __PPC64_POWERNV_FA_DUMP_H__ */
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [RFC PATCH 5/5] powerpc/powernv: export /proc/opaldump for analysing opal crashes
2018-05-15 4:58 [RFC PATCH 0/5] Add FADump support on PowerNV platform Hari Bathini
` (3 preceding siblings ...)
2018-05-15 4:59 ` [RFC PATCH 4/5] powerpc/fadump: process architected register state data provided by firmware Hari Bathini
@ 2018-05-15 4:59 ` Hari Bathini
4 siblings, 0 replies; 6+ messages in thread
From: Hari Bathini @ 2018-05-15 4:59 UTC (permalink / raw)
To: Ananth N Mavinakayanahalli, Michael Ellerman,
Mahesh J Salgaonkar, Vasant Hegde, linuxppc-dev, Stewart Smith
Cc: Hari Bathini
From: Hari Bathini <hbathini@linux.vnet.ibm.com>
Export /proc/opaldump file to analyze opal crashes
Signed-off-by: Hari Bathini <hbathini@linux.vnet.ibm.com>
---
arch/powerpc/platforms/powernv/Makefile | 2
arch/powerpc/platforms/powernv/opalcore.c | 282 +++++++++++++++++++++++
arch/powerpc/platforms/powernv/opalcore.h | 28 ++
arch/powerpc/platforms/powernv/powernv_fadump.c | 52 ++++
4 files changed, 358 insertions(+), 6 deletions(-)
create mode 100644 arch/powerpc/platforms/powernv/opalcore.c
create mode 100644 arch/powerpc/platforms/powernv/opalcore.h
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index 0d106b5..31f828f 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -6,7 +6,7 @@ obj-y += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o
obj-y += opal-kmsg.o opal-powercap.o opal-psr.o opal-sensor-groups.o
obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o
-obj-$(CONFIG_FA_DUMP) += powernv_fadump.o
+obj-$(CONFIG_FA_DUMP) += powernv_fadump.o opalcore.o
obj-$(CONFIG_PCI) += pci.o pci-ioda.o npu-dma.o
obj-$(CONFIG_CXL_BASE) += pci-cxl.o
obj-$(CONFIG_EEH) += eeh-powernv.o
diff --git a/arch/powerpc/platforms/powernv/opalcore.c b/arch/powerpc/platforms/powernv/opalcore.c
new file mode 100644
index 0000000..d233d9a
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opalcore.c
@@ -0,0 +1,282 @@
+/*
+ * Interface for exporting the OPAL ELF core.
+ * Loosely based on fs/proc/vmcore.c
+ *
+ * Copyright 2018, IBM Corporation
+ * Author: Hari Bathini <hbathini@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/memblock.h>
+#include <linux/uaccess.h>
+#include <linux/proc_fs.h>
+#include <linux/elf.h>
+#include <linux/elfcore.h>
+#include <linux/crash_core.h>
+
+#include <asm/page.h>
+
+#include "opalcore.h"
+
+/* Total size of opalcore file. */
+static size_t opalcore_size;
+
+/* This buffer includes all the ELF core headers and the PT_NOTE */
+static char *opalcorebuf;
+static size_t opalcorebuf_sz;
+
+/* Pointer to the first PT_LOAD in the ELF file */
+Elf64_Phdr *ptload_phdr;
+unsigned int ptload_cnt;
+
+static struct proc_dir_entry *proc_opalcore;
+
+static inline int is_opalcore_usable(void)
+{
+ return (opalcorebuf != NULL) ? 1 : 0;
+}
+
+static Elf64_Word *append_elf64_note(Elf64_Word *buf, char *name,
+ unsigned int type, void *data,
+ size_t data_len)
+{
+ Elf64_Nhdr *note = (Elf64_Nhdr *)buf;
+ Elf64_Word namesz = strlen(name) + 1;
+
+ note->n_namesz = cpu_to_be32(strlen(name) + 1);
+ note->n_descsz = cpu_to_be32(data_len);
+ note->n_type = cpu_to_be32(type);
+ buf += DIV_ROUND_UP(sizeof(*note), sizeof(Elf64_Word));
+ memcpy(buf, name, namesz);
+ buf += DIV_ROUND_UP(note->n_namesz, sizeof(Elf64_Word));
+ memcpy(buf, data, data_len);
+ buf += DIV_ROUND_UP(data_len, sizeof(Elf64_Word));
+
+ return buf;
+}
+
+static void final_elf64_note(Elf64_Word *buf)
+{
+ memset(buf, 0, sizeof(Elf64_Nhdr));
+}
+
+static Elf64_Word *regs_to_elf64_notes(Elf64_Word *buf, struct pt_regs *regs)
+{
+ struct elf_prstatus prstatus;
+
+ memset(&prstatus, 0, sizeof(prstatus));
+ elf_core_copy_kernel_regs(&prstatus.pr_reg, regs);
+ buf = append_elf64_note(buf, CRASH_CORE_NOTE_NAME, NT_PRSTATUS,
+ &prstatus, sizeof(prstatus));
+ return buf;
+}
+
+/*
+ * Read from the ELF header and then the crash dump.
+ * Returns number of bytes read on success, -errno on failure.
+ */
+static ssize_t read_opalcore(struct file *file, char __user *buffer,
+ size_t buflen, loff_t *fpos)
+{
+ Elf64_Phdr *phdr = ptload_phdr;
+ ssize_t tsz, acc = 0;
+ int i;
+
+ if (buflen == 0 || *fpos >= opalcore_size)
+ return 0;
+
+ /* Read ELF core header and/or PT_NOTE segment */
+ if (*fpos < opalcorebuf_sz) {
+ tsz = min(opalcorebuf_sz - (size_t)*fpos, buflen);
+ if (copy_to_user(buffer, opalcorebuf + *fpos, tsz))
+ return -EFAULT;
+ buflen -= tsz;
+ *fpos += tsz;
+ buffer += tsz;
+ acc += tsz;
+
+ /* leave now if filled buffer already */
+ if (buflen == 0)
+ return acc;
+ }
+
+ if ((phdr == NULL) || (ptload_cnt == 0))
+ return -EFAULT;
+
+ for (i = 0; i < ptload_cnt; i++) {
+ uint64_t end = (be64_to_cpu(phdr->p_offset) +
+ be64_to_cpu(phdr->p_memsz));
+
+ if (*fpos < end) {
+ void *addr;
+
+ tsz = (size_t)min_t(unsigned long long,
+ (end - *fpos),
+ buflen);
+ addr = (void *)(be64_to_cpu(phdr->p_vaddr) + *fpos -
+ be64_to_cpu(phdr->p_offset));
+ if (copy_to_user(buffer, addr, tsz))
+ return -EFAULT;
+ buflen -= tsz;
+ *fpos += tsz;
+ buffer += tsz;
+ acc += tsz;
+
+ /* leave now if filled buffer already */
+ if (buflen == 0)
+ return acc;
+ }
+ phdr++;
+ }
+
+ return acc;
+}
+
+static const struct file_operations proc_opalcore_operations = {
+ .read = read_opalcore,
+};
+
+int __init create_opalcore(struct opalcore_config *oc_conf)
+{
+ unsigned long hdr_size, order, count, paddr, i;
+ Elf64_Ehdr *elf;
+ Elf64_Phdr *phdr;
+ loff_t opalcore_off;
+ struct page *page;
+ char *bufp;
+
+ if (opalcorebuf || (oc_conf->ptload_cnt == 0) ||
+ (oc_conf->ptload_cnt > MAX_PT_LOAD_CNT))
+ return -EINVAL;
+
+ hdr_size = (sizeof(Elf64_Ehdr) +
+ ((oc_conf->ptload_cnt + 1) * sizeof(Elf64_Phdr)));
+ opalcorebuf_sz = (hdr_size + oc_conf->cpu_notes_buf_size);
+ order = get_order(opalcorebuf_sz);
+ opalcorebuf = (char *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, order);
+ if (!opalcorebuf)
+ return -ENOMEM;
+
+ count = 1 << order;
+ page = virt_to_page(opalcorebuf);
+ for (i = 0; i < count; i++)
+ SetPageReserved(page + i);
+
+ /* Use count to keep track of the program headers */
+ count = 0;
+
+ bufp = opalcorebuf;
+ elf = (Elf64_Ehdr *)bufp;
+ bufp += sizeof(Elf64_Ehdr);
+ memcpy(elf->e_ident, ELFMAG, SELFMAG);
+ elf->e_ident[EI_CLASS] = ELF_CLASS;
+ elf->e_ident[EI_DATA] = ELFDATA2MSB;
+ elf->e_ident[EI_VERSION] = EV_CURRENT;
+ elf->e_ident[EI_OSABI] = ELF_OSABI;
+ memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
+ elf->e_type = cpu_to_be16(ET_CORE);
+ elf->e_machine = cpu_to_be16(ELF_ARCH);
+ elf->e_version = cpu_to_be32(EV_CURRENT);
+ elf->e_entry = 0;
+ elf->e_phoff = cpu_to_be64(sizeof(Elf64_Ehdr));
+ elf->e_shoff = 0;
+ elf->e_flags = 0;
+
+ elf->e_ehsize = cpu_to_be16(sizeof(Elf64_Ehdr));
+ elf->e_phentsize = cpu_to_be16(sizeof(Elf64_Phdr));
+ elf->e_phnum = 0;
+ elf->e_shentsize = 0;
+ elf->e_shnum = 0;
+ elf->e_shstrndx = 0;
+
+ phdr = (Elf64_Phdr *)bufp;
+ bufp += sizeof(Elf64_Phdr);
+ phdr->p_type = cpu_to_be32(PT_NOTE);
+ phdr->p_flags = 0;
+ phdr->p_align = 0;
+ phdr->p_paddr = phdr->p_vaddr = 0;
+ phdr->p_offset = cpu_to_be64(hdr_size);
+ phdr->p_filesz = phdr->p_memsz =
+ cpu_to_be64(oc_conf->cpu_notes_buf_size);
+ count++;
+
+ opalcore_off = opalcorebuf_sz;
+ ptload_phdr = (Elf64_Phdr *)bufp;
+ ptload_cnt = oc_conf->ptload_cnt;
+ paddr = 0;
+ for (i = 0; i < ptload_cnt; i++) {
+ phdr = (Elf64_Phdr *)bufp;
+ bufp += sizeof(Elf64_Phdr);
+ phdr->p_type = cpu_to_be32(PT_LOAD);
+ phdr->p_flags = cpu_to_be32(PF_R|PF_W|PF_X);
+ phdr->p_align = 0;
+ phdr->p_paddr = paddr;
+ phdr->p_vaddr =
+ cpu_to_be64((Elf64_Addr)__va(oc_conf->ptload_addr[i]));
+ phdr->p_filesz = phdr->p_memsz =
+ cpu_to_be64(oc_conf->ptload_size[i]);
+ phdr->p_offset = cpu_to_be64(opalcore_off);
+
+ count++;
+ opalcore_off += oc_conf->ptload_size[i];
+ paddr += oc_conf->ptload_size[i];
+ }
+
+ elf->e_phnum = cpu_to_be16(count);
+
+ for (i = 0; i < oc_conf->nr_threads; i++) {
+ bufp = (char *)regs_to_elf64_notes((Elf64_Word *)bufp,
+ &(oc_conf->regs[i]));
+ }
+ final_elf64_note((Elf64_Word *)bufp);
+
+ opalcore_size = opalcore_off;
+ return 0;
+}
+
+/* Init function for opalcore module. */
+static int __init opalcore_init(void)
+{
+ int rc = 0;
+
+ /*
+ * If opalcorebuf= is set in the 2nd kernel,
+ * then capture the dump.
+ */
+ if (!(is_opalcore_usable()))
+ return rc;
+
+ proc_opalcore = proc_create("opalcore", 0400, NULL,
+ &proc_opalcore_operations);
+ if (proc_opalcore)
+ proc_set_size(proc_opalcore, opalcore_size);
+ return 0;
+}
+fs_initcall(opalcore_init);
+
+/* Cleanup function for opalcore module. */
+void opalcore_cleanup(void)
+{
+ unsigned long order, count, i;
+ struct page *page;
+
+ if (proc_opalcore) {
+ proc_remove(proc_opalcore);
+ proc_opalcore = NULL;
+ }
+
+ ptload_phdr = NULL;
+ ptload_cnt = 0;
+
+ /* free core buffer */
+ order = get_order(opalcorebuf_sz);
+ count = 1 << order;
+ page = virt_to_page(opalcorebuf);
+ for (i = 0; i < count; i++)
+ ClearPageReserved(page + i);
+ __free_pages(page, order);
+}
diff --git a/arch/powerpc/platforms/powernv/opalcore.h b/arch/powerpc/platforms/powernv/opalcore.h
new file mode 100644
index 0000000..b791a54
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opalcore.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2018, IBM Corporation
+ * Author: Hari Bathini <hbathini@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _OPALCORE_H
+#define _OPALCORE_H
+
+#define MAX_PT_LOAD_CNT 16
+
+struct opalcore_config {
+ unsigned long cpu_notes_buf_size;
+ unsigned long ptload_cnt;
+ unsigned long ptload_addr[MAX_PT_LOAD_CNT];
+ unsigned long ptload_size[MAX_PT_LOAD_CNT];
+ unsigned int nr_threads;
+ struct pt_regs regs[NR_CPUS];
+};
+
+extern int create_opalcore(struct opalcore_config *opalcore_config);
+extern void opalcore_cleanup(void);
+
+#endif /* _OPALCORE_H */
diff --git a/arch/powerpc/platforms/powernv/powernv_fadump.c b/arch/powerpc/platforms/powernv/powernv_fadump.c
index 36f0360..fd95bbb 100644
--- a/arch/powerpc/platforms/powernv/powernv_fadump.c
+++ b/arch/powerpc/platforms/powernv/powernv_fadump.c
@@ -26,8 +26,10 @@
#include "../../kernel/fadump_internal.h"
#include "powernv_fadump.h"
+#include "opalcore.h"
static struct powernv_fadump_mem_struct fdm;
+static struct opalcore_config oc_config;
static const struct powernv_fadump_mem_struct *fdm_active;
unsigned long fdm_actual_size;
@@ -200,6 +202,8 @@ static int powernv_invalidate_fadump(struct fw_dump *fadump_conf)
{
int rc;
+ opalcore_cleanup();
+
rc = opal_configure_fadump(FADUMP_INVALIDATE, (void *)fdm_active,
fdm_actual_size);
if (rc) {
@@ -230,15 +234,19 @@ static inline int fadump_get_logical_cpu(struct fadump_backup_area *ba, u32 pir)
static struct fadump_reg_entry*
fadump_read_registers(unsigned int regs_per_thread,
struct fadump_reg_entry *reg_entry,
- struct pt_regs *regs)
+ struct pt_regs *regs, bool opal_data)
{
int i;
+ u64 reg_value;
memset(regs, 0, sizeof(struct pt_regs));
for (i = 0; i < regs_per_thread; i++) {
+ reg_value = (opal_data ? reg_entry->reg_value :
+ be64_to_cpu(reg_entry->reg_value));
+
fadump_set_regval(regs, be64_to_cpu(reg_entry->reg_id),
- be64_to_cpu(reg_entry->reg_value));
+ reg_value);
reg_entry++;
}
return reg_entry;
@@ -330,16 +338,23 @@ static int __init fadump_build_cpu_notes(struct fw_dump *fadump_conf)
regs = fdh->regs;
note_buf = fadump_regs_to_elf_notes(note_buf,
®s);
+ fadump_read_registers(regs_per_thread,
+ reg_entry,
+ &oc_config.regs[cpu],
+ true);
continue;
}
}
reg_entry = (struct fadump_reg_entry *)(bufp +
CPU_REG_ENTRY_OFFSET);
- fadump_read_registers(regs_per_thread, reg_entry, ®s);
+ fadump_read_registers(regs_per_thread, reg_entry, ®s, false);
note_buf = fadump_regs_to_elf_notes(note_buf, ®s);
+ fadump_read_registers(regs_per_thread, reg_entry,
+ &oc_config.regs[cpu], true);
}
final_note(note_buf);
+ oc_config.nr_threads = num_cpus;
if (fdh) {
pr_debug("Updating elfcore header (%llx) with cpu notes\n",
@@ -361,7 +376,7 @@ static int __init fadump_build_cpu_notes(struct fw_dump *fadump_conf)
static int __init powernv_process_fadump(struct fw_dump *fadump_conf)
{
struct fadump_crash_info_header *fdh;
- int rc = 0;
+ int i, rc = 0;
if (!fdm_active || !fadump_conf->fadumphdr_addr)
return -EINVAL;
@@ -384,7 +399,34 @@ static int __init powernv_process_fadump(struct fw_dump *fadump_conf)
*/
elfcorehdr_addr = fdh->elfcorehdr_addr;
- return rc;
+ /*
+ * pt_regs for opalcore are populated while build cpu notes
+ * for vmcore. Populate other config info to facilitate
+ * exporting /proc/opalcore file.
+ */
+ oc_config.cpu_notes_buf_size = fadump_conf->cpu_notes_buf_size;
+ oc_config.ptload_cnt = 0;
+ for(i = 0; i < be16_to_cpu(fdm_active->section_count); i++) {
+ u8 src_type = fdm_active->section[i].src_type;
+
+ if ((src_type < POWERNV_FADUMP_OPAL_REGION) ||
+ (src_type >= POWERNV_FADUMP_FW_REGION))
+ continue;
+
+ if (oc_config.ptload_cnt >= MAX_PT_LOAD_CNT)
+ break;
+
+ oc_config.ptload_addr[oc_config.ptload_cnt] =
+ be64_to_cpu(fdm_active->section[i].dest_addr);
+ oc_config.ptload_size[oc_config.ptload_cnt++] =
+ be64_to_cpu(fdm_active->section[i].dest_size);
+ }
+
+ rc = create_opalcore(&oc_config);
+ if (rc)
+ pr_warn("Could not create opalcore ELF file\n");
+
+ return 0;
}
static void powernv_fadump_region_show(struct fw_dump *fadump_conf,
^ permalink raw reply related [flat|nested] 6+ messages in thread