All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] makedumpfile: printk: add support for lockless ringbuffer
@ 2020-11-19  2:41 HAGIO KAZUHITO(萩尾 一仁)
  2020-11-20  2:04 ` John Ogness
  0 siblings, 1 reply; 8+ messages in thread
From: HAGIO KAZUHITO(萩尾 一仁) @ 2020-11-19  2:41 UTC (permalink / raw)
  To: John Ogness, kexec

From: John Ogness <john.ogness@linutronix.de>

Linux 5.10 introduces a new lockless ringbuffer.  The new ringbuffer
is structured completely different to the previous iterations.
Add support for retrieving the ringbuffer from debug information
and/or using vmcoreinfo.  The new ringbuffer is detected based on
the availability of the "prb" symbol.

Signed-off-by: John Ogness <john.ogness@linutronix.de>
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
---
I've updated John's RFC makedumpfile patch to match 5.10-rc4 kernel.
Changes from the RFC patch:
- followed the following kernel commit
    cfe2790b163a ("printk: move printk_info into separate array")
- divided members of struct printk_log in offset_table into each structure
  for readability
- added some error handlings
- also dump head record that was missed

 Makefile       |   2 +-
 dwarf_info.c   |  36 +++++++++-
 makedumpfile.c | 103 +++++++++++++++++++++++++++--
 makedumpfile.h |  58 +++++++++++++++++
 printk.c       | 203 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 395 insertions(+), 7 deletions(-)
 create mode 100644 printk.c

diff --git a/Makefile b/Makefile
index a0f69b4e1ad8..1fe8efe00dcb 100644
--- a/Makefile
+++ b/Makefile
@@ -45,7 +45,7 @@ CFLAGS_ARCH += -m32
 endif
 
 SRC_BASE = makedumpfile.c makedumpfile.h diskdump_mod.h sadump_mod.h sadump_info.h
-SRC_PART = print_info.c dwarf_info.c elf_info.c erase_info.c sadump_info.c cache.c tools.c
+SRC_PART = print_info.c dwarf_info.c elf_info.c erase_info.c sadump_info.c cache.c tools.c printk.c
 OBJ_PART=$(patsubst %.c,%.o,$(SRC_PART))
 SRC_ARCH = arch/arm.c arch/arm64.c arch/x86.c arch/x86_64.c arch/ia64.c arch/ppc64.c arch/s390x.c arch/ppc.c arch/sparc64.c
 OBJ_ARCH=$(patsubst %.c,%.o,$(SRC_ARCH))
diff --git a/dwarf_info.c b/dwarf_info.c
index e42a9f5857f3..543588ba369b 100644
--- a/dwarf_info.c
+++ b/dwarf_info.c
@@ -614,6 +614,7 @@ search_structure(Dwarf_Die *die, int *found)
 {
 	int tag;
 	const char *name;
+	Dwarf_Die die_type;
 
 	/*
 	 * If we get to here then we don't have any more
@@ -622,9 +623,31 @@ search_structure(Dwarf_Die *die, int *found)
 	do {
 		tag  = dwarf_tag(die);
 		name = dwarf_diename(die);
-		if ((tag != DW_TAG_structure_type) || (!name)
-		    || strcmp(name, dwarf_info.struct_name))
+		if ((!name) || strcmp(name, dwarf_info.struct_name))
+			continue;
+
+		if (tag == DW_TAG_typedef) {
+			if (!get_die_type(die, &die_type)) {
+				ERRMSG("Can't get CU die of DW_AT_type.\n");
+				break;
+			}
+
+			/* Resolve typedefs of typedefs. */
+			while ((tag = dwarf_tag(&die_type)) == DW_TAG_typedef) {
+				if (!get_die_type(&die_type, &die_type)) {
+					ERRMSG("Can't get CU die of DW_AT_type.\n");
+					return;
+				}
+			}
+
+			if (tag != DW_TAG_structure_type)
+				continue;
+			die = &die_type;
+
+		} else if (tag != DW_TAG_structure_type) {
 			continue;
+		}
+
 		/*
 		 * Skip if DW_AT_byte_size is not included.
 		 */
@@ -740,6 +763,15 @@ search_typedef(Dwarf_Die *die, int *found)
 				ERRMSG("Can't get CU die of DW_AT_type.\n");
 				break;
 			}
+
+			/* Resolve typedefs of typedefs. */
+			while ((tag = dwarf_tag(&die_type)) == DW_TAG_typedef) {
+				if (!get_die_type(&die_type, &die_type)) {
+					ERRMSG("Can't get CU die of DW_AT_type.\n");
+					return;
+				}
+			}
+
 			dwarf_info.struct_size = dwarf_bytesize(&die_type);
 			if (dwarf_info.struct_size <= 0)
 				continue;
diff --git a/makedumpfile.c b/makedumpfile.c
index cdde040e2133..061741f72a5c 100644
--- a/makedumpfile.c
+++ b/makedumpfile.c
@@ -1555,6 +1555,7 @@ get_symbol_info(void)
 	SYMBOL_INIT(node_data, "node_data");
 	SYMBOL_INIT(pgdat_list, "pgdat_list");
 	SYMBOL_INIT(contig_page_data, "contig_page_data");
+	SYMBOL_INIT(prb, "prb");
 	SYMBOL_INIT(log_buf, "log_buf");
 	SYMBOL_INIT(log_buf_len, "log_buf_len");
 	SYMBOL_INIT(log_end, "log_end");
@@ -1971,16 +1972,47 @@ get_structure_info(void)
 	OFFSET_INIT(elf64_phdr.p_memsz, "elf64_phdr", "p_memsz");
 
 	SIZE_INIT(printk_log, "printk_log");
-	if (SIZE(printk_log) != NOT_FOUND_STRUCTURE) {
+	SIZE_INIT(printk_ringbuffer, "printk_ringbuffer");
+	if ((SIZE(printk_ringbuffer) != NOT_FOUND_STRUCTURE)) {
+		info->flag_use_printk_ringbuffer = TRUE;
+		info->flag_use_printk_log = FALSE;
+
+		OFFSET_INIT(printk_ringbuffer.desc_ring, "printk_ringbuffer", "desc_ring");
+		OFFSET_INIT(printk_ringbuffer.text_data_ring, "printk_ringbuffer", "text_data_ring");
+
+		OFFSET_INIT(prb_desc_ring.count_bits, "prb_desc_ring", "count_bits");
+		OFFSET_INIT(prb_desc_ring.descs, "prb_desc_ring", "descs");
+		OFFSET_INIT(prb_desc_ring.infos, "prb_desc_ring", "infos");
+		OFFSET_INIT(prb_desc_ring.head_id, "prb_desc_ring", "head_id");
+		OFFSET_INIT(prb_desc_ring.tail_id, "prb_desc_ring", "tail_id");
+
+		SIZE_INIT(prb_desc, "prb_desc");
+		OFFSET_INIT(prb_desc.state_var, "prb_desc", "state_var");
+		OFFSET_INIT(prb_desc.text_blk_lpos, "prb_desc", "text_blk_lpos");
+
+		OFFSET_INIT(prb_data_blk_lpos.begin, "prb_data_blk_lpos", "begin");
+		OFFSET_INIT(prb_data_blk_lpos.next, "prb_data_blk_lpos", "next");
+
+		OFFSET_INIT(prb_data_ring.size_bits, "prb_data_ring", "size_bits");
+		OFFSET_INIT(prb_data_ring.data, "prb_data_ring", "data");
+
+		SIZE_INIT(printk_info, "printk_info");
+		OFFSET_INIT(printk_info.ts_nsec, "printk_info", "ts_nsec");
+		OFFSET_INIT(printk_info.text_len, "printk_info", "text_len");
+
+		OFFSET_INIT(atomic_long_t.counter, "atomic_long_t", "counter");
+	} else if (SIZE(printk_log) != NOT_FOUND_STRUCTURE) {
 		/*
 		 * In kernel 3.11-rc4 the log structure name was renamed
 		 * to "printk_log".
 		 */
+		info->flag_use_printk_ringbuffer = FALSE;
 		info->flag_use_printk_log = TRUE;
 		OFFSET_INIT(printk_log.ts_nsec, "printk_log", "ts_nsec");
 		OFFSET_INIT(printk_log.len, "printk_log", "len");
 		OFFSET_INIT(printk_log.text_len, "printk_log", "text_len");
 	} else {
+		info->flag_use_printk_ringbuffer = FALSE;
 		info->flag_use_printk_log = FALSE;
 		SIZE_INIT(printk_log, "log");
 		OFFSET_INIT(printk_log.ts_nsec, "log", "ts_nsec");
@@ -2191,6 +2223,7 @@ write_vmcoreinfo_data(void)
 	WRITE_SYMBOL("node_data", node_data);
 	WRITE_SYMBOL("pgdat_list", pgdat_list);
 	WRITE_SYMBOL("contig_page_data", contig_page_data);
+	WRITE_SYMBOL("prb", prb);
 	WRITE_SYMBOL("log_buf", log_buf);
 	WRITE_SYMBOL("log_buf_len", log_buf_len);
 	WRITE_SYMBOL("log_end", log_end);
@@ -2222,7 +2255,11 @@ write_vmcoreinfo_data(void)
 	WRITE_STRUCTURE_SIZE("node_memblk_s", node_memblk_s);
 	WRITE_STRUCTURE_SIZE("nodemask_t", nodemask_t);
 	WRITE_STRUCTURE_SIZE("pageflags", pageflags);
-	if (info->flag_use_printk_log)
+	if (info->flag_use_printk_ringbuffer) {
+		WRITE_STRUCTURE_SIZE("printk_ringbuffer", printk_ringbuffer);
+		WRITE_STRUCTURE_SIZE("prb_desc", prb_desc);
+		WRITE_STRUCTURE_SIZE("printk_info", printk_info);
+	} else if (info->flag_use_printk_log)
 		WRITE_STRUCTURE_SIZE("printk_log", printk_log);
 	else
 		WRITE_STRUCTURE_SIZE("log", printk_log);
@@ -2268,7 +2305,30 @@ write_vmcoreinfo_data(void)
 	WRITE_MEMBER_OFFSET("vm_struct.addr", vm_struct.addr);
 	WRITE_MEMBER_OFFSET("vmap_area.va_start", vmap_area.va_start);
 	WRITE_MEMBER_OFFSET("vmap_area.list", vmap_area.list);
-	if (info->flag_use_printk_log) {
+	if (info->flag_use_printk_ringbuffer) {
+		WRITE_MEMBER_OFFSET("printk_ringbuffer.desc_ring", printk_ringbuffer.desc_ring);
+		WRITE_MEMBER_OFFSET("printk_ringbuffer.text_data_ring", printk_ringbuffer.text_data_ring);
+
+		WRITE_MEMBER_OFFSET("prb_desc_ring.count_bits", prb_desc_ring.count_bits);
+		WRITE_MEMBER_OFFSET("prb_desc_ring.descs", prb_desc_ring.descs);
+		WRITE_MEMBER_OFFSET("prb_desc_ring.infos", prb_desc_ring.infos);
+		WRITE_MEMBER_OFFSET("prb_desc_ring.head_id", prb_desc_ring.head_id);
+		WRITE_MEMBER_OFFSET("prb_desc_ring.tail_id", prb_desc_ring.tail_id);
+
+		WRITE_MEMBER_OFFSET("prb_desc.state_var", prb_desc.state_var);
+		WRITE_MEMBER_OFFSET("prb_desc.text_blk_lpos", prb_desc.text_blk_lpos);
+
+		WRITE_MEMBER_OFFSET("prb_data_blk_lpos.begin", prb_data_blk_lpos.begin);
+		WRITE_MEMBER_OFFSET("prb_data_blk_lpos.next", prb_data_blk_lpos.next);
+
+		WRITE_MEMBER_OFFSET("prb_data_ring.size_bits", prb_data_ring.size_bits);
+		WRITE_MEMBER_OFFSET("prb_data_ring.data", prb_data_ring.data);
+
+		WRITE_MEMBER_OFFSET("printk_info.ts_nsec", printk_info.ts_nsec);
+		WRITE_MEMBER_OFFSET("printk_info.text_len", printk_info.text_len);
+
+		WRITE_MEMBER_OFFSET("atomic_long_t.counter", atomic_long_t.counter);
+	} else if (info->flag_use_printk_log) {
 		WRITE_MEMBER_OFFSET("printk_log.ts_nsec", printk_log.ts_nsec);
 		WRITE_MEMBER_OFFSET("printk_log.len", printk_log.len);
 		WRITE_MEMBER_OFFSET("printk_log.text_len", printk_log.text_len);
@@ -2606,6 +2666,7 @@ read_vmcoreinfo(void)
 	READ_SYMBOL("node_data", node_data);
 	READ_SYMBOL("pgdat_list", pgdat_list);
 	READ_SYMBOL("contig_page_data", contig_page_data);
+	READ_SYMBOL("prb", prb);
 	READ_SYMBOL("log_buf", log_buf);
 	READ_SYMBOL("log_buf_len", log_buf_len);
 	READ_SYMBOL("log_end", log_end);
@@ -2684,12 +2745,43 @@ read_vmcoreinfo(void)
 	READ_MEMBER_OFFSET("cpu_spec.mmu_features", cpu_spec.mmu_features);
 
 	READ_STRUCTURE_SIZE("printk_log", printk_log);
-	if (SIZE(printk_log) != NOT_FOUND_STRUCTURE) {
+	READ_STRUCTURE_SIZE("printk_ringbuffer", printk_ringbuffer);
+	if (SIZE(printk_ringbuffer) != NOT_FOUND_STRUCTURE) {
+		info->flag_use_printk_ringbuffer = TRUE;
+		info->flag_use_printk_log = FALSE;
+
+		READ_MEMBER_OFFSET("printk_ringbuffer.desc_ring", printk_ringbuffer.desc_ring);
+		READ_MEMBER_OFFSET("printk_ringbuffer.text_data_ring", printk_ringbuffer.text_data_ring);
+
+		READ_MEMBER_OFFSET("prb_desc_ring.count_bits", prb_desc_ring.count_bits);
+		READ_MEMBER_OFFSET("prb_desc_ring.descs", prb_desc_ring.descs);
+		READ_MEMBER_OFFSET("prb_desc_ring.infos", prb_desc_ring.infos);
+		READ_MEMBER_OFFSET("prb_desc_ring.head_id", prb_desc_ring.head_id);
+		READ_MEMBER_OFFSET("prb_desc_ring.tail_id", prb_desc_ring.tail_id);
+
+		READ_STRUCTURE_SIZE("prb_desc", prb_desc);
+		READ_MEMBER_OFFSET("prb_desc.state_var", prb_desc.state_var);
+		READ_MEMBER_OFFSET("prb_desc.text_blk_lpos", prb_desc.text_blk_lpos);
+
+		READ_MEMBER_OFFSET("prb_data_blk_lpos.begin", prb_data_blk_lpos.begin);
+		READ_MEMBER_OFFSET("prb_data_blk_lpos.next", prb_data_blk_lpos.next);
+
+		READ_MEMBER_OFFSET("prb_data_ring.size_bits", prb_data_ring.size_bits);
+		READ_MEMBER_OFFSET("prb_data_ring.data", prb_data_ring.data);
+
+		READ_STRUCTURE_SIZE("printk_info", printk_info);
+		READ_MEMBER_OFFSET("printk_info.ts_nsec", printk_info.ts_nsec);
+		READ_MEMBER_OFFSET("printk_info.text_len", printk_info.text_len);
+
+		READ_MEMBER_OFFSET("atomic_long_t.counter", atomic_long_t.counter);
+	} else if (SIZE(printk_log) != NOT_FOUND_STRUCTURE) {
+		info->flag_use_printk_ringbuffer = FALSE;
 		info->flag_use_printk_log = TRUE;
 		READ_MEMBER_OFFSET("printk_log.ts_nsec", printk_log.ts_nsec);
 		READ_MEMBER_OFFSET("printk_log.len", printk_log.len);
 		READ_MEMBER_OFFSET("printk_log.text_len", printk_log.text_len);
 	} else {
+		info->flag_use_printk_ringbuffer = FALSE;
 		info->flag_use_printk_log = FALSE;
 		READ_STRUCTURE_SIZE("log", printk_log);
 		READ_MEMBER_OFFSET("log.ts_nsec", printk_log.ts_nsec);
@@ -5286,6 +5378,9 @@ dump_dmesg()
 	if (!initial())
 		return FALSE;
 
+	if ((SYMBOL(prb) != NOT_FOUND_SYMBOL))
+		return dump_lockless_dmesg();
+
 	if ((SYMBOL(log_buf) == NOT_FOUND_SYMBOL)
 	    || (SYMBOL(log_buf_len) == NOT_FOUND_SYMBOL)) {
 		ERRMSG("Can't find some symbols for log_buf.\n");
diff --git a/makedumpfile.h b/makedumpfile.h
index 698c0547c0f2..47f7e7999014 100644
--- a/makedumpfile.h
+++ b/makedumpfile.h
@@ -1317,6 +1317,7 @@ struct DumpInfo {
 	int             flag_partial_dmesg;  /* dmesg dump only from the last cleared index*/
 	int             flag_mem_usage;  /*show the page number of memory in different use*/
 	int		flag_use_printk_log; /* did we read printk_log symbol name? */
+	int		flag_use_printk_ringbuffer; /* using lockless printk ringbuffer? */
 	int		flag_nospace;	     /* the flag of "No space on device" error */
 	int		flag_vmemmap;        /* kernel supports vmemmap address space */
 	int		flag_excludevm;      /* -e - excluding unused vmemmap pages */
@@ -1602,6 +1603,7 @@ struct symbol_table {
 	unsigned long long	node_data;
 	unsigned long long	pgdat_list;
 	unsigned long long	contig_page_data;
+	unsigned long long	prb;
 	unsigned long long	log_buf;
 	unsigned long long	log_buf_len;
 	unsigned long long	log_end;
@@ -1690,6 +1692,13 @@ struct size_table {
 	long	printk_log;
 
 	/*
+	 * for lockless printk ringbuffer
+	 */
+	long	printk_ringbuffer;
+	long	prb_desc;
+	long	printk_info;
+
+	/*
 	 * for Xen extraction
 	 */
 	long	page_info;
@@ -1865,6 +1874,52 @@ struct offset_table {
 	} printk_log;
 
 	/*
+	 * for lockless printk ringbuffer
+	 */
+	struct printk_ringbuffer_s {
+		long desc_ring;
+		long text_data_ring;
+		long fail;
+	} printk_ringbuffer;
+
+	struct prb_desc_ring_s {
+		long count_bits;
+		long descs;
+		long infos;
+		long head_id;
+		long tail_id;
+	} prb_desc_ring;
+
+	struct prb_desc_s {
+		long state_var;
+		long text_blk_lpos;
+	} prb_desc;
+
+	struct prb_data_blk_lpos_s {
+		long begin;
+		long next;
+	} prb_data_blk_lpos;
+
+	struct printk_info_s {
+		long seq;
+		long ts_nsec;
+		long text_len;
+		long caller_id;
+		long dev_info;
+	} printk_info;
+
+	struct prb_data_ring_s {
+		long size_bits;
+		long data;
+		long head_lpos;
+		long tail_lpos;
+	} prb_data_ring;
+
+	struct atomic_long_t_s {
+		long counter;
+	} atomic_long_t;
+
+	/*
 	 * symbols on ppc64 arch
 	 */
 	struct mmu_psize_def_s {
@@ -2390,4 +2445,7 @@ int hexadecimal(char *s, int count);
 int decimal(char *s, int count);
 int file_exists(char *file);
 
+int open_dump_file(void);
+int dump_lockless_dmesg(void);
+
 #endif /* MAKEDUMPFILE_H */
diff --git a/printk.c b/printk.c
new file mode 100644
index 000000000000..8e00901d9494
--- /dev/null
+++ b/printk.c
@@ -0,0 +1,203 @@
+#include "makedumpfile.h"
+#include <ctype.h>
+
+#define DESC_SV_BITS		(sizeof(unsigned long) * 8)
+#define DESC_COMMITTED_MASK	(1UL << (DESC_SV_BITS - 1))
+#define DESC_REUSE_MASK		(1UL << (DESC_SV_BITS - 2))
+#define DESC_FLAGS_MASK		(DESC_COMMITTED_MASK | DESC_REUSE_MASK)
+#define DESC_ID_MASK		(~DESC_FLAGS_MASK)
+
+/* convenience struct for passing many values to helper functions */
+struct prb_map {
+	char		*prb;
+
+	char		*desc_ring;
+	unsigned long	desc_ring_count;
+	char		*descs;
+	char		*infos;
+
+	char		*text_data_ring;
+	unsigned long	text_data_ring_size;
+	char		*text_data;
+};
+
+static void
+dump_record(struct prb_map *m, unsigned long id)
+{
+	unsigned long long ts_nsec;
+	unsigned long state_var;
+	unsigned short text_len;
+	unsigned long begin;
+	unsigned long next;
+	char buf[BUFSIZE];
+	ulonglong nanos;
+	int indent_len;
+	int buf_need;
+	char *bufp;
+	char *text;
+	char *desc;
+	char *inf;
+	ulong rem;
+	char *p;
+	int i;
+
+	desc = m->descs + ((id % m->desc_ring_count) * SIZE(prb_desc));
+
+	/* skip non-committed record */
+	state_var = ULONG(desc + OFFSET(prb_desc.state_var) + OFFSET(atomic_long_t.counter));
+	if ((state_var & DESC_FLAGS_MASK) != DESC_COMMITTED_MASK)
+		return;
+
+	begin = ULONG(desc + OFFSET(prb_desc.text_blk_lpos) + OFFSET(prb_data_blk_lpos.begin)) %
+			m->text_data_ring_size;
+	next = ULONG(desc + OFFSET(prb_desc.text_blk_lpos) + OFFSET(prb_data_blk_lpos.next)) %
+			m->text_data_ring_size;
+
+	/* skip data-less text blocks */
+	if (begin == next)
+		return;
+
+	inf = m->infos + ((id % m->desc_ring_count) * SIZE(printk_info));
+
+	text_len = USHORT(inf + OFFSET(printk_info.text_len));
+
+	/* handle wrapping data block */
+	if (begin > next)
+		begin = 0;
+
+	/* skip over descriptor ID */
+	begin += sizeof(unsigned long);
+
+	/* handle truncated messages */
+	if (next - begin < text_len)
+		text_len = next - begin;
+
+	text = m->text_data + begin;
+
+	ts_nsec = ULONGLONG(inf + OFFSET(printk_info.ts_nsec));
+	nanos = (ulonglong)ts_nsec / (ulonglong)1000000000;
+	rem = (ulonglong)ts_nsec % (ulonglong)1000000000;
+
+	bufp = buf;
+	bufp += sprintf(buf, "[%5lld.%06ld] ", nanos, rem/1000);
+	indent_len = strlen(buf);
+
+	/* How much buffer space is needed in the worst case */
+	buf_need = MAX(sizeof("\\xXX\n"), sizeof("\n") + indent_len);
+
+	for (i = 0, p = text; i < text_len; i++, p++) {
+		if (bufp - buf >= sizeof(buf) - buf_need) {
+			if (write(info->fd_dumpfile, buf, bufp - buf) < 0)
+				return;
+			bufp = buf;
+		}
+
+		if (*p == '\n')
+			bufp += sprintf(bufp, "\n%-*s", indent_len, "");
+		else if (isprint(*p) || isspace(*p))
+			*bufp++ = *p;
+		else
+			bufp += sprintf(bufp, "\\x%02x", *p);
+	}
+
+	*bufp++ = '\n';
+
+	write(info->fd_dumpfile, buf, bufp - buf);
+}
+
+int
+dump_lockless_dmesg(void)
+{
+	unsigned long head_id;
+	unsigned long tail_id;
+	unsigned long kaddr;
+	unsigned long id;
+	struct prb_map m;
+	int ret = FALSE;
+
+	/* setup printk_ringbuffer */
+	if (!readmem(VADDR, SYMBOL(prb), &kaddr, sizeof(kaddr))) {
+		ERRMSG("Can't get the prb address.\n");
+		return ret;
+	}
+
+	m.prb = malloc(SIZE(printk_ringbuffer));
+	if (!m.prb) {
+		ERRMSG("Can't allocate memory for prb.\n");
+		return ret;
+	}
+	if (!readmem(VADDR, kaddr, m.prb, SIZE(printk_ringbuffer))) {
+		ERRMSG("Can't get prb.\n");
+		goto out_prb;
+	}
+
+	/* setup descriptor ring */
+	m.desc_ring = m.prb + OFFSET(printk_ringbuffer.desc_ring);
+	m.desc_ring_count = 1 << UINT(m.desc_ring + OFFSET(prb_desc_ring.count_bits));
+
+	kaddr = ULONG(m.desc_ring + OFFSET(prb_desc_ring.descs));
+	m.descs = malloc(SIZE(prb_desc) * m.desc_ring_count);
+	if (!m.descs) {
+		ERRMSG("Can't allocate memory for prb.desc_ring.descs.\n");
+		goto out_prb;
+	}
+	if (!readmem(VADDR, kaddr, m.descs,
+		     SIZE(prb_desc) * m.desc_ring_count)) {
+		ERRMSG("Can't get prb.desc_ring.descs.\n");
+		goto out_descs;
+	}
+
+	kaddr = ULONG(m.desc_ring + OFFSET(prb_desc_ring.infos));
+	m.infos = malloc(SIZE(printk_info) * m.desc_ring_count);
+	if (!m.infos) {
+		ERRMSG("Can't allocate memory for prb.desc_ring.infos.\n");
+		goto out_descs;
+	}
+	if (!readmem(VADDR, kaddr, m.infos, SIZE(printk_info) * m.desc_ring_count)) {
+		ERRMSG("Can't get prb.desc_ring.infos.\n");
+		goto out_infos;
+	}
+
+	/* setup text data ring */
+	m.text_data_ring = m.prb + OFFSET(printk_ringbuffer.text_data_ring);
+	m.text_data_ring_size = 1 << UINT(m.text_data_ring + OFFSET(prb_data_ring.size_bits));
+
+	kaddr = ULONG(m.text_data_ring + OFFSET(prb_data_ring.data));
+	m.text_data = malloc(m.text_data_ring_size);
+	if (!readmem(VADDR, kaddr, m.text_data, m.text_data_ring_size)) {
+		ERRMSG("Can't get prb.text_data_ring.\n");
+		goto out_text_data;
+	}
+
+	/* ready to go */
+
+	tail_id = ULONG(m.desc_ring + OFFSET(prb_desc_ring.tail_id) +
+			OFFSET(atomic_long_t.counter));
+	head_id = ULONG(m.desc_ring + OFFSET(prb_desc_ring.head_id) +
+			OFFSET(atomic_long_t.counter));
+
+	if (!open_dump_file()) {
+		ERRMSG("Can't open output file.\n");
+		goto out_text_data;
+	}
+
+	for (id = tail_id; id != head_id; id = (id + 1) & DESC_ID_MASK)
+		dump_record(&m, id);
+
+	/* dump head record */
+	dump_record(&m, id);
+
+	if (!close_files_for_creating_dumpfile())
+		goto out_text_data;
+
+	ret = TRUE;
+out_text_data:
+	free(m.text_data);
+out_infos:
+	free(m.infos);
+out_descs:
+	free(m.descs);
+out_prb:
+	free(m.prb);
+	return ret;
+}
-- 
1.8.3.1


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] makedumpfile: printk: add support for lockless ringbuffer
  2020-11-19  2:41 [PATCH] makedumpfile: printk: add support for lockless ringbuffer HAGIO KAZUHITO(萩尾 一仁)
@ 2020-11-20  2:04 ` John Ogness
  2020-11-20  6:16   ` HAGIO KAZUHITO(萩尾 一仁)
  0 siblings, 1 reply; 8+ messages in thread
From: John Ogness @ 2020-11-20  2:04 UTC (permalink / raw)
  To: HAGIO KAZUHITO(萩尾 一仁), kexec

On 2020-11-19, HAGIO KAZUHITO(萩尾 一仁)	<k-hagio-ab@nec.com> wrote:
> From: John Ogness <john.ogness@linutronix.de>
>
> Linux 5.10 introduces a new lockless ringbuffer.  The new ringbuffer
> is structured completely different to the previous iterations.
> Add support for retrieving the ringbuffer from debug information
> and/or using vmcoreinfo.  The new ringbuffer is detected based on
> the availability of the "prb" symbol.
>
> Signed-off-by: John Ogness <john.ogness@linutronix.de>
> Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
> ---
> I've updated John's RFC makedumpfile patch to match 5.10-rc4 kernel.
> Changes from the RFC patch:
> - followed the following kernel commit
>     cfe2790b163a ("printk: move printk_info into separate array")
> - divided members of struct printk_log in offset_table into each structure
>   for readability
> - added some error handlings
> - also dump head record that was missed

I confirm that these changes are correct. Thanks for updating this,
adding the needed error handling, and catching that the head record was
missed!

I tested this by:

1. Boot kernel with: crashkernel=512M

2. Setup and trigger crash:

   kexec -p /boot/bzImage --initrd=/boot/rescue-initrd --append="console=ttyS0,115200"
   echo c > /proc/sysrq-trigger

3. From rescue environment, copy crashed vmcore to external machine:

   cp /proc/vmcore /remote/nfs/mount/

4. From external machine, extract kernel log using vmcoreinfo:

   makedumpfile -g ./vmcoreinfo -x ./vmlinux
   makedumpfile --dump-dmesg -i ./vmcoreinfo ./vmcore dmesg1.txt

5. From external machine, extract kernel log using debug symbols:

   makedumpfile --dump-dmesg -x ./vmlinux ./vmcore dmesg2.txt

6. Compare and inspect the kernel logs:

   diff dmesg1.txt dmesg2.txt
   cat dmesg1.txt

John Ogness

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH] makedumpfile: printk: add support for lockless ringbuffer
  2020-11-20  2:04 ` John Ogness
@ 2020-11-20  6:16   ` HAGIO KAZUHITO(萩尾 一仁)
  2020-11-24 10:41     ` John Ogness
  0 siblings, 1 reply; 8+ messages in thread
From: HAGIO KAZUHITO(萩尾 一仁) @ 2020-11-20  6:16 UTC (permalink / raw)
  To: John Ogness, kexec


-----Original Message-----
> On 2020-11-19, HAGIO KAZUHITO(萩尾 一仁)	<k-hagio-ab@nec.com> wrote:
> > From: John Ogness <john.ogness@linutronix.de>
> >
> > Linux 5.10 introduces a new lockless ringbuffer.  The new ringbuffer
> > is structured completely different to the previous iterations.
> > Add support for retrieving the ringbuffer from debug information
> > and/or using vmcoreinfo.  The new ringbuffer is detected based on
> > the availability of the "prb" symbol.
> >
> > Signed-off-by: John Ogness <john.ogness@linutronix.de>
> > Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
> > ---
> > I've updated John's RFC makedumpfile patch to match 5.10-rc4 kernel.
> > Changes from the RFC patch:
> > - followed the following kernel commit
> >     cfe2790b163a ("printk: move printk_info into separate array")
> > - divided members of struct printk_log in offset_table into each structure
> >   for readability
> > - added some error handlings
> > - also dump head record that was missed
> 
> I confirm that these changes are correct. Thanks for updating this,
> adding the needed error handling, and catching that the head record was
> missed!

Hi John,

Thank you for confirming and testing.
I will merge this after a few slight fixes and more tests.

I've updated your crash patch [1] as well, it would be helpful if you
would check it.
[1] https://www.redhat.com/archives/crash-utility/2020-November/msg00065.html

Thanks,
Kazu


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH] makedumpfile: printk: add support for lockless ringbuffer
  2020-11-20  6:16   ` HAGIO KAZUHITO(萩尾 一仁)
@ 2020-11-24 10:41     ` John Ogness
  2020-11-24 23:51       ` HAGIO KAZUHITO(萩尾 一仁)
  0 siblings, 1 reply; 8+ messages in thread
From: John Ogness @ 2020-11-24 10:41 UTC (permalink / raw)
  To: HAGIO KAZUHITO(萩尾 一仁), kexec

Hi Kazu,

On 2020-11-20, HAGIO KAZUHITO(萩尾 一仁)	<k-hagio-ab@nec.com> wrote:
> Thank you for confirming and testing.
> I will merge this after a few slight fixes and more tests.

After looking more closely, I see that your patch is still using the old
state flags. With the current version, there is now a value-based state
field. Both state values 1 (committed) and 2 (finalized) are valid for
printing. Should I submit a follow-up patch? Or are these the "slight
fixes" you are referring to?

John Ogness

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH] makedumpfile: printk: add support for lockless ringbuffer
  2020-11-24 10:41     ` John Ogness
@ 2020-11-24 23:51       ` HAGIO KAZUHITO(萩尾 一仁)
  2020-11-25  9:07         ` John Ogness
  0 siblings, 1 reply; 8+ messages in thread
From: HAGIO KAZUHITO(萩尾 一仁) @ 2020-11-24 23:51 UTC (permalink / raw)
  To: John Ogness, kexec

Hi John,

-----Original Message-----
> Hi Kazu,
> 
> On 2020-11-20, HAGIO KAZUHITO(萩尾 一仁)	<k-hagio-ab@nec.com> wrote:
> > Thank you for confirming and testing.
> > I will merge this after a few slight fixes and more tests.
> 
> After looking more closely, I see that your patch is still using the old
> state flags. With the current version, there is now a value-based state
> field. Both state values 1 (committed) and 2 (finalized) are valid for
> printing. Should I submit a follow-up patch? Or are these the "slight
> fixes" you are referring to?

Thank you for pointing it out!  Could you submit a follow-up patch?
It would be very helpful.

Thanks,
Kazu
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH] makedumpfile: printk: add support for lockless ringbuffer
  2020-11-24 23:51       ` HAGIO KAZUHITO(萩尾 一仁)
@ 2020-11-25  9:07         ` John Ogness
  2020-11-26  0:14           ` HAGIO KAZUHITO(萩尾 一仁)
  0 siblings, 1 reply; 8+ messages in thread
From: John Ogness @ 2020-11-25  9:07 UTC (permalink / raw)
  To: HAGIO KAZUHITO(萩尾 一仁), kexec

[-- Attachment #1: Type: text/plain, Size: 440 bytes --]

On 2020-11-24, HAGIO KAZUHITO(萩尾 一仁)	<k-hagio-ab@nec.com> wrote:
>> After looking more closely, I see that your patch is still using the
>> old state flags. With the current version, there is now a value-based
>> state field.
>
> Thank you for pointing it out!  Could you submit a follow-up patch?

I have attached a follow-up patch. It is pretty much the exact same
patch as the one I sent for "crash".

John Ogness


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-printk-use-committed-finalized-state-values.patch --]
[-- Type: text/x-diff, Size: 3071 bytes --]

From 58396867cb3bfd1ca060cf5eb3a910d7f8c192c2 Mon Sep 17 00:00:00 2001
From: John Ogness <john.ogness@linutronix.de>
Date: Wed, 25 Nov 2020 10:10:31 +0106
Subject: [PATCH] printk: use committed/finalized state values

The ringbuffer entries use 2 state values (committed and finalized)
rather than a single flag to represent being available for reading.
Copy the definitions and state lookup function directly from the
kernel source and use the new states.

Signed-off-by: John Ogness <john.ogness@linutronix.de>
---
 printk.c | 48 +++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 41 insertions(+), 7 deletions(-)

diff --git a/printk.c b/printk.c
index 8e00901..9cecbd1 100644
--- a/printk.c
+++ b/printk.c
@@ -1,12 +1,6 @@
 #include "makedumpfile.h"
 #include <ctype.h>
 
-#define DESC_SV_BITS		(sizeof(unsigned long) * 8)
-#define DESC_COMMITTED_MASK	(1UL << (DESC_SV_BITS - 1))
-#define DESC_REUSE_MASK		(1UL << (DESC_SV_BITS - 2))
-#define DESC_FLAGS_MASK		(DESC_COMMITTED_MASK | DESC_REUSE_MASK)
-#define DESC_ID_MASK		(~DESC_FLAGS_MASK)
-
 /* convenience struct for passing many values to helper functions */
 struct prb_map {
 	char		*prb;
@@ -21,12 +15,51 @@ struct prb_map {
 	char		*text_data;
 };
 
+/*
+ * desc_state and DESC_* definitions taken from kernel source:
+ *
+ * kernel/printk/printk_ringbuffer.h
+ */
+
+/* The possible responses of a descriptor state-query. */
+enum desc_state {
+	desc_miss	=  -1,	/* ID mismatch (pseudo state) */
+	desc_reserved	= 0x0,	/* reserved, in use by writer */
+	desc_committed	= 0x1,	/* committed by writer, could get reopened */
+	desc_finalized	= 0x2,	/* committed, no further modification allowed */
+	desc_reusable	= 0x3,	/* free, not yet used by any writer */
+};
+
+#define DESC_SV_BITS		(sizeof(unsigned long) * 8)
+#define DESC_FLAGS_SHIFT	(DESC_SV_BITS - 2)
+#define DESC_FLAGS_MASK		(3UL << DESC_FLAGS_SHIFT)
+#define DESC_STATE(sv)		(3UL & (sv >> DESC_FLAGS_SHIFT))
+#define DESC_ID_MASK		(~DESC_FLAGS_MASK)
+#define DESC_ID(sv)		((sv) & DESC_ID_MASK)
+
+/*
+ * get_desc_state() taken from kernel source:
+ *
+ * kernel/printk/printk_ringbuffer.c
+ */
+
+/* Query the state of a descriptor. */
+static enum desc_state get_desc_state(unsigned long id,
+				      unsigned long state_val)
+{
+	if (id != DESC_ID(state_val))
+		return desc_miss;
+
+	return DESC_STATE(state_val);
+}
+
 static void
 dump_record(struct prb_map *m, unsigned long id)
 {
 	unsigned long long ts_nsec;
 	unsigned long state_var;
 	unsigned short text_len;
+	enum desc_state state;
 	unsigned long begin;
 	unsigned long next;
 	char buf[BUFSIZE];
@@ -45,7 +78,8 @@ dump_record(struct prb_map *m, unsigned long id)
 
 	/* skip non-committed record */
 	state_var = ULONG(desc + OFFSET(prb_desc.state_var) + OFFSET(atomic_long_t.counter));
-	if ((state_var & DESC_FLAGS_MASK) != DESC_COMMITTED_MASK)
+	state = get_desc_state(id, state_var);
+	if (state != desc_committed && state != desc_finalized)
 		return;
 
 	begin = ULONG(desc + OFFSET(prb_desc.text_blk_lpos) + OFFSET(prb_data_blk_lpos.begin)) %
-- 
2.20.1


[-- Attachment #3: Type: text/plain, Size: 143 bytes --]

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* RE: [PATCH] makedumpfile: printk: add support for lockless ringbuffer
  2020-11-25  9:07         ` John Ogness
@ 2020-11-26  0:14           ` HAGIO KAZUHITO(萩尾 一仁)
  2020-11-27  0:00             ` HAGIO KAZUHITO(萩尾 一仁)
  0 siblings, 1 reply; 8+ messages in thread
From: HAGIO KAZUHITO(萩尾 一仁) @ 2020-11-26  0:14 UTC (permalink / raw)
  To: John Ogness, kexec

-----Original Message-----
> On 2020-11-24, HAGIO KAZUHITO(萩尾 一仁)	<k-hagio-ab@nec.com> wrote:
> >> After looking more closely, I see that your patch is still using the
> >> old state flags. With the current version, there is now a value-based
> >> state field.
> >
> > Thank you for pointing it out!  Could you submit a follow-up patch?
> 
> I have attached a follow-up patch. It is pretty much the exact same
> patch as the one I sent for "crash".

Thank you for the patch again, John.  I appreciate it.  Will merge.

Apparently an email with attached file is held by the list system, so
unfolding it below to share and record, just in case.

Thanks,
Kazu


From 58396867cb3bfd1ca060cf5eb3a910d7f8c192c2 Mon Sep 17 00:00:00 2001
From: John Ogness <john.ogness@linutronix.de>
Date: Wed, 25 Nov 2020 10:10:31 +0106
Subject: [PATCH] printk: use committed/finalized state values

The ringbuffer entries use 2 state values (committed and finalized)
rather than a single flag to represent being available for reading.
Copy the definitions and state lookup function directly from the
kernel source and use the new states.

Signed-off-by: John Ogness <john.ogness@linutronix.de>
---
 printk.c | 48 +++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 41 insertions(+), 7 deletions(-)

diff --git a/printk.c b/printk.c
index 8e00901..9cecbd1 100644
--- a/printk.c
+++ b/printk.c
@@ -1,12 +1,6 @@
 #include "makedumpfile.h"
 #include <ctype.h>
 
-#define DESC_SV_BITS		(sizeof(unsigned long) * 8)
-#define DESC_COMMITTED_MASK	(1UL << (DESC_SV_BITS - 1))
-#define DESC_REUSE_MASK		(1UL << (DESC_SV_BITS - 2))
-#define DESC_FLAGS_MASK		(DESC_COMMITTED_MASK | DESC_REUSE_MASK)
-#define DESC_ID_MASK		(~DESC_FLAGS_MASK)
-
 /* convenience struct for passing many values to helper functions */
 struct prb_map {
 	char		*prb;
@@ -21,12 +15,51 @@ struct prb_map {
 	char		*text_data;
 };
 
+/*
+ * desc_state and DESC_* definitions taken from kernel source:
+ *
+ * kernel/printk/printk_ringbuffer.h
+ */
+
+/* The possible responses of a descriptor state-query. */
+enum desc_state {
+	desc_miss	=  -1,	/* ID mismatch (pseudo state) */
+	desc_reserved	= 0x0,	/* reserved, in use by writer */
+	desc_committed	= 0x1,	/* committed by writer, could get reopened */
+	desc_finalized	= 0x2,	/* committed, no further modification allowed */
+	desc_reusable	= 0x3,	/* free, not yet used by any writer */
+};
+
+#define DESC_SV_BITS		(sizeof(unsigned long) * 8)
+#define DESC_FLAGS_SHIFT	(DESC_SV_BITS - 2)
+#define DESC_FLAGS_MASK		(3UL << DESC_FLAGS_SHIFT)
+#define DESC_STATE(sv)		(3UL & (sv >> DESC_FLAGS_SHIFT))
+#define DESC_ID_MASK		(~DESC_FLAGS_MASK)
+#define DESC_ID(sv)		((sv) & DESC_ID_MASK)
+
+/*
+ * get_desc_state() taken from kernel source:
+ *
+ * kernel/printk/printk_ringbuffer.c
+ */
+
+/* Query the state of a descriptor. */
+static enum desc_state get_desc_state(unsigned long id,
+				      unsigned long state_val)
+{
+	if (id != DESC_ID(state_val))
+		return desc_miss;
+
+	return DESC_STATE(state_val);
+}
+
 static void
 dump_record(struct prb_map *m, unsigned long id)
 {
 	unsigned long long ts_nsec;
 	unsigned long state_var;
 	unsigned short text_len;
+	enum desc_state state;
 	unsigned long begin;
 	unsigned long next;
 	char buf[BUFSIZE];
@@ -45,7 +78,8 @@ dump_record(struct prb_map *m, unsigned long id)
 
 	/* skip non-committed record */
 	state_var = ULONG(desc + OFFSET(prb_desc.state_var) + OFFSET(atomic_long_t.counter));
-	if ((state_var & DESC_FLAGS_MASK) != DESC_COMMITTED_MASK)
+	state = get_desc_state(id, state_var);
+	if (state != desc_committed && state != desc_finalized)
 		return;
 
 	begin = ULONG(desc + OFFSET(prb_desc.text_blk_lpos) + OFFSET(prb_data_blk_lpos.begin)) %
-- 
2.20.1

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* RE: [PATCH] makedumpfile: printk: add support for lockless ringbuffer
  2020-11-26  0:14           ` HAGIO KAZUHITO(萩尾 一仁)
@ 2020-11-27  0:00             ` HAGIO KAZUHITO(萩尾 一仁)
  0 siblings, 0 replies; 8+ messages in thread
From: HAGIO KAZUHITO(萩尾 一仁) @ 2020-11-27  0:00 UTC (permalink / raw)
  To: John Ogness, kexec

-----Original Message-----
> -----Original Message-----
> > On 2020-11-24, HAGIO KAZUHITO(萩尾 一仁)	<k-hagio-ab@nec.com> wrote:
> > >> After looking more closely, I see that your patch is still using the
> > >> old state flags. With the current version, there is now a value-based
> > >> state field.
> > >
> > > Thank you for pointing it out!  Could you submit a follow-up patch?
> >
> > I have attached a follow-up patch. It is pretty much the exact same
> > patch as the one I sent for "crash".

Merged those patches.
https://github.com/makedumpfile/makedumpfile/compare/0188ea9...44b073b

Thanks,
Kazu

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2020-11-27  0:00 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-11-19  2:41 [PATCH] makedumpfile: printk: add support for lockless ringbuffer HAGIO KAZUHITO(萩尾 一仁)
2020-11-20  2:04 ` John Ogness
2020-11-20  6:16   ` HAGIO KAZUHITO(萩尾 一仁)
2020-11-24 10:41     ` John Ogness
2020-11-24 23:51       ` HAGIO KAZUHITO(萩尾 一仁)
2020-11-25  9:07         ` John Ogness
2020-11-26  0:14           ` HAGIO KAZUHITO(萩尾 一仁)
2020-11-27  0:00             ` HAGIO KAZUHITO(萩尾 一仁)

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.