LinuxPPC-Dev Archive on lore.kernel.org
 help / color / Atom feed
From: Hari Bathini <hbathini@linux.ibm.com>
To: linuxppc-dev <linuxppc-dev@ozlabs.org>
Cc: Ananth N Mavinakayanahalli <ananth@linux.ibm.com>,
	Mahesh J Salgaonkar <mahesh@linux.ibm.com>,
	Vasant Hegde <hegdevasant@linux.ibm.com>,
	Oliver <oohall@gmail.com>, Nicholas Piggin <npiggin@gmail.com>,
	Daniel Axtens <dja@axtens.net>
Subject: [PATCH v5 21/31] powernv/fadump: process architected register state data provided by firmware
Date: Tue, 20 Aug 2019 17:36:42 +0530
Message-ID: <156630280239.8896.11769233860624935762.stgit@hbathini.in.ibm.com> (raw)
In-Reply-To: <156630261682.8896.3418665808003586786.stgit@hbathini.in.ibm.com>

From: Hari Bathini <hbathini@linux.vnet.ibm.com>

Firmware provides architected register state data at the time of crash.
Process this data and build CPU notes to append to ELF core.

Signed-off-by: Hari Bathini <hbathini@linux.vnet.ibm.com>
Signed-off-by: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/fadump-common.h          |    4 +
 arch/powerpc/platforms/powernv/opal-fadump.c |  198 ++++++++++++++++++++++++--
 arch/powerpc/platforms/powernv/opal-fadump.h |   39 +++++
 3 files changed, 229 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/kernel/fadump-common.h b/arch/powerpc/kernel/fadump-common.h
index 7107cf2..fc408b0 100644
--- a/arch/powerpc/kernel/fadump-common.h
+++ b/arch/powerpc/kernel/fadump-common.h
@@ -98,7 +98,11 @@ struct fw_dump {
 	/* cmd line option during boot */
 	unsigned long	reserve_bootvar;
 
+	unsigned long	cpu_state_destination_addr;
+	unsigned long	cpu_state_data_version;
+	unsigned long	cpu_state_entry_size;
 	unsigned long	cpu_state_data_size;
+
 	unsigned long	hpte_region_size;
 
 	unsigned long	boot_mem_dest_addr;
diff --git a/arch/powerpc/platforms/powernv/opal-fadump.c b/arch/powerpc/platforms/powernv/opal-fadump.c
index f75b861..9a32a7f 100644
--- a/arch/powerpc/platforms/powernv/opal-fadump.c
+++ b/arch/powerpc/platforms/powernv/opal-fadump.c
@@ -23,6 +23,7 @@
 #include "opal-fadump.h"
 
 static const struct opal_fadump_mem_struct *opal_fdm_active;
+static const struct opal_mpipl_fadump *opal_cpu_metadata;
 static struct opal_fadump_mem_struct *opal_fdm;
 
 static int opal_fadump_unregister(struct fw_dump *fadump_conf);
@@ -282,15 +283,122 @@ static void opal_fadump_cleanup(struct fw_dump *fadump_conf)
 		pr_warn("Could not reset (%llu) kernel metadata tag!\n", ret);
 }
 
+static inline void opal_fadump_set_regval_regnum(struct pt_regs *regs,
+						 u32 reg_type, u32 reg_num,
+						 u64 reg_val)
+{
+	if (reg_type == HDAT_FADUMP_REG_TYPE_GPR) {
+		if (reg_num < 32)
+			regs->gpr[reg_num] = reg_val;
+		return;
+	}
+
+	switch (reg_num) {
+	case SPRN_CTR:
+		regs->ctr = reg_val;
+		break;
+	case SPRN_LR:
+		regs->link = reg_val;
+		break;
+	case SPRN_XER:
+		regs->xer = reg_val;
+		break;
+	case SPRN_DAR:
+		regs->dar = reg_val;
+		break;
+	case SPRN_DSISR:
+		regs->dsisr = reg_val;
+		break;
+	case HDAT_FADUMP_REG_ID_NIP:
+		regs->nip = reg_val;
+		break;
+	case HDAT_FADUMP_REG_ID_MSR:
+		regs->msr = reg_val;
+		break;
+	case HDAT_FADUMP_REG_ID_CCR:
+		regs->ccr = reg_val;
+		break;
+	}
+}
+
+static inline void opal_fadump_read_regs(char *bufp, unsigned int regs_cnt,
+					 unsigned int reg_entry_size,
+					 struct pt_regs *regs)
+{
+	int i;
+	struct hdat_fadump_reg_entry *reg_entry;
+
+	memset(regs, 0, sizeof(struct pt_regs));
+
+	for (i = 0; i < regs_cnt; i++, bufp += reg_entry_size) {
+		reg_entry = (struct hdat_fadump_reg_entry *)bufp;
+		opal_fadump_set_regval_regnum(regs,
+					      be32_to_cpu(reg_entry->reg_type),
+					      be32_to_cpu(reg_entry->reg_num),
+					      be64_to_cpu(reg_entry->reg_val));
+	}
+}
+
+static inline bool __init is_thread_core_inactive(u8 core_state)
+{
+	bool is_inactive = false;
+
+	if (core_state == HDAT_FADUMP_CORE_INACTIVE)
+		is_inactive = true;
+
+	return is_inactive;
+}
+
 /*
  * Convert CPU state data saved at the time of crash into ELF notes.
+ *
+ * Each register entry is of 16 bytes, A numerical identifier along with
+ * a GPR/SPR flag in the first 8 bytes and the register value in the next
+ * 8 bytes. For more details refer to F/W documentation.
  */
 static int __init opal_fadump_build_cpu_notes(struct fw_dump *fadump_conf)
 {
 	u32 num_cpus, *note_buf;
 	struct fadump_crash_info_header *fdh = NULL;
+	struct hdat_fadump_thread_hdr *thdr;
+	unsigned long addr;
+	u32 thread_pir;
+	char *bufp;
+	struct pt_regs regs;
+	unsigned int size_of_each_thread;
+	unsigned int regs_offset, regs_cnt, reg_esize;
+	int i;
+
+	fadump_conf->cpu_state_entry_size =
+			be32_to_cpu(opal_cpu_metadata->cpu_data_size);
+	fadump_conf->cpu_state_destination_addr =
+			be64_to_cpu(opal_cpu_metadata->region[0].dest);
+	fadump_conf->cpu_state_data_size =
+			be64_to_cpu(opal_cpu_metadata->region[0].size);
+
+	if ((fadump_conf->cpu_state_destination_addr == 0) ||
+	    (fadump_conf->cpu_state_entry_size == 0)) {
+		pr_err("CPU state data not available for processing!\n");
+		return -ENODEV;
+	}
+
+	size_of_each_thread = fadump_conf->cpu_state_entry_size;
+	num_cpus = (fadump_conf->cpu_state_data_size / size_of_each_thread);
+
+	addr = fadump_conf->cpu_state_destination_addr;
+	bufp = __va(addr);
+
+	/*
+	 * Offset for register entries, entry size and registers count is
+	 * duplicated in every thread header in keeping with HDAT format.
+	 * Use these values from the first thread header.
+	 */
+	thdr = (struct hdat_fadump_thread_hdr *)bufp;
+	regs_offset = (offsetof(struct hdat_fadump_thread_hdr, offset) +
+		       be32_to_cpu(thdr->offset));
+	reg_esize = be32_to_cpu(thdr->esize);
+	regs_cnt  = be32_to_cpu(thdr->ecnt);
 
-	num_cpus = 1;
 	/* Allocate buffer to hold cpu crash notes. */
 	fadump_conf->cpu_notes_buf_size = num_cpus * sizeof(note_buf_t);
 	fadump_conf->cpu_notes_buf_size =
@@ -309,10 +417,53 @@ static int __init opal_fadump_build_cpu_notes(struct fw_dump *fadump_conf)
 	if (fadump_conf->fadumphdr_addr)
 		fdh = __va(fadump_conf->fadumphdr_addr);
 
-	if (fdh && (fdh->crashing_cpu != FADUMP_CPU_UNKNOWN)) {
-		note_buf = fadump_regs_to_elf_notes(note_buf, &(fdh->regs));
-		final_note(note_buf);
+	pr_debug("--------CPU State Data------------\n");
+	pr_debug("NumCpus     : %u\n", num_cpus);
+	pr_debug("\tOffset: %u, Entry size: %u, Cnt: %u\n",
+		 regs_offset, reg_esize, regs_cnt);
+
+	for (i = 0; i < num_cpus; i++, bufp += size_of_each_thread) {
+		thdr = (struct hdat_fadump_thread_hdr *)bufp;
+
+		thread_pir = be32_to_cpu(thdr->pir);
+		pr_debug("%04d) PIR: 0x%x, core state: 0x%02x\n",
+			 (i + 1), thread_pir, thdr->core_state);
+
+		/*
+		 * Register state data of MAX cores is provided by firmware,
+		 * but some of this cores may not be active. So, while
+		 * processing register state data, check core state and
+		 * skip threads that belong to inactive cores.
+		 */
+		if (is_thread_core_inactive(thdr->core_state))
+			continue;
+
+		/*
+		 * If this is kernel initiated crash, crashing_cpu would be set
+		 * appropriately and register data of the crashing CPU saved by
+		 * crashing kernel. Add this saved register data of crashing CPU
+		 * to elf notes and populate the pt_regs for the remaining CPUs
+		 * from register state data provided by firmware.
+		 */
+		if (fdh && (fdh->crashing_cpu == thread_pir)) {
+			note_buf = fadump_regs_to_elf_notes(note_buf,
+							    &fdh->regs);
+			pr_debug("Crashing CPU PIR: 0x%x - R1 : 0x%lx, NIP : 0x%lx\n",
+				 fdh->crashing_cpu, fdh->regs.gpr[1],
+				 fdh->regs.nip);
+			continue;
+		}
+
+		opal_fadump_read_regs((bufp + regs_offset), regs_cnt,
+				      reg_esize, &regs);
 
+		note_buf = fadump_regs_to_elf_notes(note_buf, &regs);
+		pr_debug("CPU PIR: 0x%x - R1 : 0x%lx, NIP : 0x%lx\n",
+			 thread_pir, regs.gpr[1], regs.nip);
+	}
+	final_note(note_buf);
+
+	if (fdh) {
 		pr_debug("Updating elfcore header (%llx) with cpu notes\n",
 			 fdh->elfcorehdr_addr);
 		fadump_update_elfcore_header(fadump_conf,
@@ -327,7 +478,8 @@ static int __init opal_fadump_process(struct fw_dump *fadump_conf)
 	struct fadump_crash_info_header *fdh;
 	int rc = 0;
 
-	if (!opal_fdm_active || !fadump_conf->fadumphdr_addr)
+	if (!opal_fdm_active || !opal_cpu_metadata ||
+	    !fadump_conf->fadumphdr_addr)
 		return -EINVAL;
 
 	/* Validate the fadump crash info header */
@@ -337,13 +489,6 @@ static int __init opal_fadump_process(struct fw_dump *fadump_conf)
 		return -EINVAL;
 	}
 
-	/*
-	 * TODO: To build cpu notes, find a way to map PIR to logical id.
-	 *       Also, we may need different method for pseries and powernv.
-	 *       The currently booted kernel could have a different PIR to
-	 *       logical id mapping. So, try saving info of previous kernel's
-	 *       paca to get the right PIR to logical id mapping.
-	 */
 	rc = opal_fadump_build_cpu_notes(fadump_conf);
 	if (rc)
 		return rc;
@@ -397,6 +542,14 @@ static void opal_fadump_trigger(struct fadump_crash_info_header *fdh,
 {
 	int rc;
 
+	/*
+	 * Unlike on pSeries platform, logical CPU number is not provided
+	 * with architected register state data. So, store the crashing
+	 * CPU's PIR instead to plug the appropriate register data for
+	 * crashing CPU in the vmcore file.
+	 */
+	fdh->crashing_cpu = (u32)mfspr(SPRN_PIR);
+
 	rc = opal_cec_reboot2(OPAL_REBOOT_MPIPL, msg);
 	if (rc == OPAL_UNSUPPORTED) {
 		pr_emerg("Reboot type %d not supported.\n",
@@ -449,6 +602,7 @@ int __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, ulong node)
 		u64 addr = 0;
 		s64 ret;
 		const struct opal_fadump_mem_struct *r_opal_fdm_active;
+		const struct opal_mpipl_fadump *r_opal_cpu_metadata;
 
 		ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_KERNEL, &addr);
 		if ((ret != OPAL_SUCCESS) || !addr) {
@@ -473,6 +627,26 @@ int __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, ulong node)
 			return 1;
 		}
 
+		ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_CPU, &addr);
+		if ((ret != OPAL_SUCCESS) || !addr) {
+			pr_err("Failed to get CPU metadata (%lld)\n", ret);
+			return 1;
+		}
+
+		addr = be64_to_cpu(addr);
+		pr_debug("CPU metadata addr: %llx\n", addr);
+
+		opal_cpu_metadata = __va(addr);
+		r_opal_cpu_metadata = (void *)addr;
+		fadump_conf->cpu_state_data_version =
+			be32_to_cpu(r_opal_cpu_metadata->cpu_data_version);
+		if (fadump_conf->cpu_state_data_version !=
+		    HDAT_FADUMP_CPU_DATA_VERSION) {
+			pr_err("CPU data format version (%lu) mismatch!\n",
+			       fadump_conf->cpu_state_data_version);
+			return 1;
+		}
+
 		pr_info("Firmware-assisted dump is active.\n");
 		fadump_conf->dump_active = 1;
 		opal_fadump_get_config(fadump_conf, r_opal_fdm_active);
diff --git a/arch/powerpc/platforms/powernv/opal-fadump.h b/arch/powerpc/platforms/powernv/opal-fadump.h
index 19cac1f..ce4c522 100644
--- a/arch/powerpc/platforms/powernv/opal-fadump.h
+++ b/arch/powerpc/platforms/powernv/opal-fadump.h
@@ -30,4 +30,43 @@ struct opal_fadump_mem_struct {
 	struct opal_mpipl_region	rgn[OPAL_FADUMP_MAX_MEM_REGS];
 } __attribute__((packed));
 
+/*
+ * CPU state data is provided by f/w. Below are the definitions
+ * provided in HDAT spec. Refer to latest HDAT specification for
+ * any update to this format.
+ */
+
+#define HDAT_FADUMP_CPU_DATA_VERSION		1
+
+#define HDAT_FADUMP_CORE_INACTIVE		(0x0F)
+
+/* HDAT thread header for register entries */
+struct hdat_fadump_thread_hdr {
+	__be32  pir;
+	/* 0x00 - 0x0F - The corresponding stop state of the core */
+	u8      core_state;
+	u8      reserved[3];
+
+	__be32	offset;	/* Offset to Register Entries array */
+	__be32	ecnt;	/* Number of entries */
+	__be32	esize;	/* Alloc size of each array entry in bytes */
+	__be32	eactsz;	/* Actual size of each array entry in bytes */
+} __attribute__((packed));
+
+/* Register types populated by f/w */
+#define HDAT_FADUMP_REG_TYPE_GPR		0x01
+#define HDAT_FADUMP_REG_TYPE_SPR		0x02
+
+/* ID numbers used by f/w while populating certain registers */
+#define HDAT_FADUMP_REG_ID_NIP			0x7D0
+#define HDAT_FADUMP_REG_ID_MSR			0x7D1
+#define HDAT_FADUMP_REG_ID_CCR			0x7D2
+
+/* HDAT register entry. */
+struct hdat_fadump_reg_entry {
+	__be32		reg_type;
+	__be32		reg_num;
+	__be64		reg_val;
+} __attribute__((packed));
+
 #endif /* __PPC64_OPAL_FA_DUMP_H__ */


  parent reply index

Thread overview: 74+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-08-20 12:04 [PATCH v5 00/31] Add FADump support on PowerNV platform Hari Bathini
2019-08-20 12:04 ` [PATCH v5 01/31] powerpc/fadump: move internal macros/definitions to a new header Hari Bathini
2019-09-03 11:09   ` Michael Ellerman
2019-09-03 16:05     ` Hari Bathini
2019-08-20 12:04 ` [PATCH v5 02/31] powerpc/fadump: move internal code to a new file Hari Bathini
2019-09-03 11:09   ` Michael Ellerman
2019-09-03 16:05     ` Hari Bathini
2019-09-04  9:02       ` Mahesh Jagannath Salgaonkar
2019-09-04 18:26         ` Hari Bathini
2019-08-20 12:04 ` [PATCH v5 03/31] powerpc/fadump: Improve fadump documentation Hari Bathini
2019-08-20 12:04 ` [PATCH v5 04/31] pseries/fadump: move rtas specific definitions to platform code Hari Bathini
2019-08-20 12:04 ` [PATCH v5 05/31] pseries/fadump: introduce callbacks for platform specific operations Hari Bathini
2019-09-03 11:10   ` Michael Ellerman
2019-09-03 16:06     ` Hari Bathini
2019-09-06  6:39       ` Hari Bathini
2019-08-20 12:04 ` [PATCH v5 06/31] pseries/fadump: define register/un-register callback functions Hari Bathini
2019-09-03 11:10   ` Michael Ellerman
2019-09-03 17:15     ` Hari Bathini
2019-08-20 12:04 ` [PATCH v5 07/31] powerpc/fadump: release all the memory above boot memory size Hari Bathini
2019-09-03 11:10   ` Michael Ellerman
2019-09-03 16:27     ` Hari Bathini
2019-08-20 12:05 ` [PATCH v5 08/31] pseries/fadump: move out platform specific support from generic code Hari Bathini
2019-08-20 12:05 ` [PATCH v5 09/31] powerpc/fadump: use FADump instead of fadump for how it is pronounced Hari Bathini
2019-08-20 12:05 ` [PATCH v5 10/31] opal: add MPIPL interface definitions Hari Bathini
2019-09-03 11:10   ` Michael Ellerman
2019-09-03 16:28     ` Hari Bathini
2019-09-04 11:03       ` Michael Ellerman
2019-09-04 11:05   ` Michael Ellerman
2019-08-20 12:05 ` [PATCH v5 11/31] powernv/fadump: add fadump support on powernv Hari Bathini
2019-09-03 11:10   ` Michael Ellerman
2019-09-03 16:31     ` Hari Bathini
2019-09-04 14:33       ` Hari Bathini
2019-09-05  3:11         ` Michael Ellerman
2019-08-20 12:05 ` [PATCH v5 12/31] powernv/fadump: register kernel metadata address with opal Hari Bathini
2019-09-04 11:25   ` Michael Ellerman
2019-08-20 12:05 ` [PATCH v5 13/31] powernv/fadump: reset metadata address during clean up Hari Bathini
2019-08-27 12:00   ` Hari Bathini
2019-08-20 12:05 ` [PATCH v5 14/31] powernv/fadump: define register/un-register callback functions Hari Bathini
2019-09-05  4:15   ` Michael Ellerman
2019-09-05  7:23   ` Michael Ellerman
2019-09-05  9:54     ` Hari Bathini
2019-08-20 12:05 ` [PATCH v5 15/31] powernv/fadump: support copying multiple kernel boot memory regions Hari Bathini
2019-09-04 11:30   ` Michael Ellerman
2019-09-04 20:20     ` Hari Bathini
2019-09-05  3:13       ` Michael Ellerman
2019-08-20 12:06 ` [PATCH v5 16/31] powernv/fadump: process the crashdump by exporting it as /proc/vmcore Hari Bathini
2019-09-04 11:42   ` Michael Ellerman
2019-09-04 21:01     ` Hari Bathini
2019-08-20 12:06 ` [PATCH v5 17/31] powernv/fadump: Warn before processing partial crashdump Hari Bathini
2019-09-04 11:48   ` Michael Ellerman
2019-08-20 12:06 ` [PATCH v5 18/31] powernv/fadump: handle invalidation of crashdump and re-registraion Hari Bathini
2019-08-20 12:06 ` [PATCH v5 19/31] powerpc/fadump: Update documentation about OPAL platform support Hari Bathini
2019-09-04 11:51   ` Michael Ellerman
2019-09-04 12:08     ` Oliver O'Halloran
2019-09-05  3:15       ` Michael Ellerman
2019-08-20 12:06 ` [PATCH v5 20/31] powerpc/fadump: use smaller offset while finding memory for reservation Hari Bathini
2019-09-04 11:54   ` Michael Ellerman
2019-08-20 12:06 ` Hari Bathini [this message]
2019-09-04 12:20   ` [PATCH v5 21/31] powernv/fadump: process architected register state data provided by firmware Michael Ellerman
2019-09-09 13:23     ` Hari Bathini
2019-09-09 15:33       ` Oliver O'Halloran
2019-09-10  8:48         ` Hari Bathini
2019-09-10 14:05           ` Michael Ellerman
2019-09-10 16:10             ` Hari Bathini
2019-08-20 12:06 ` [PATCH v5 22/31] powerpc/fadump: make crash memory ranges array allocation generic Hari Bathini
2019-08-20 12:06 ` [PATCH v5 23/31] powerpc/fadump: consider reserved ranges while releasing memory Hari Bathini
2019-08-20 12:07 ` [PATCH v5 24/31] powerpc/fadump: improve how crashed kernel's memory is reserved Hari Bathini
2019-08-20 12:07 ` [PATCH v5 25/31] powernv/fadump: add support to preserve crash data on FADUMP disabled kernel Hari Bathini
2019-08-20 12:07 ` [PATCH v5 26/31] powerpc/fadump: update documentation about CONFIG_PRESERVE_FA_DUMP Hari Bathini
2019-08-20 12:07 ` [PATCH v5 27/31] powernv/opalcore: export /sys/firmware/opal/core for analysing opal crashes Hari Bathini
2019-08-20 12:07 ` [PATCH v5 28/31] powernv/opalcore: provide an option to invalidate /sys/firmware/opal/core file Hari Bathini
2019-08-20 12:07 ` [PATCH v5 29/31] powerpc/fadump: consider f/w load area Hari Bathini
2019-08-20 12:07 ` [PATCH v5 30/31] powernv/fadump: update documentation about option to release opalcore Hari Bathini
2019-08-20 12:07 ` [PATCH v5 31/31] powernv/fadump: support holes in kernel boot memory area Hari Bathini

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=156630280239.8896.11769233860624935762.stgit@hbathini.in.ibm.com \
    --to=hbathini@linux.ibm.com \
    --cc=ananth@linux.ibm.com \
    --cc=dja@axtens.net \
    --cc=hegdevasant@linux.ibm.com \
    --cc=linuxppc-dev@ozlabs.org \
    --cc=mahesh@linux.ibm.com \
    --cc=npiggin@gmail.com \
    --cc=oohall@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

LinuxPPC-Dev Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linuxppc-dev/0 linuxppc-dev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linuxppc-dev linuxppc-dev/ https://lore.kernel.org/linuxppc-dev \
		linuxppc-dev@lists.ozlabs.org linuxppc-dev@ozlabs.org
	public-inbox-index linuxppc-dev

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.ozlabs.lists.linuxppc-dev


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git