From: Indu Bhagat <indu.bhagat@oracle.com>
To: linux-toolchains@vger.kernel.org
Cc: daandemeyer@meta.com, andrii@kernel.org, rostedt@goodmis.org,
kris.van.hees@oracle.com, elena.zannoni@oracle.com,
nick.alcock@oracle.com, Indu Bhagat <indu.bhagat@oracle.com>
Subject: [POC 4/5] sframe: add an SFrame format stack tracer
Date: Mon, 1 May 2023 13:04:09 -0700 [thread overview]
Message-ID: <20230501200410.3973453-5-indu.bhagat@oracle.com> (raw)
In-Reply-To: <20230501200410.3973453-1-indu.bhagat@oracle.com>
This patch adds an SFrame format based stack tracer.
The files iterate_phdr.c, iterate_phdr.h implement a dl_iterate_phdr()
like functionality.
The SFrame format based stack tracer is implemented in the
sframe_unwind.c with architecture specific bits in the
arch/arm64/include/asm/sframe_regs.h and
arch/x86/include/asm/sframe_regs.h. Please note that the SFrame format
is supported for x86_64 (AMD64 ABI) and aarch64 (AAPCS64 ABI) only at
this time.
The files sframe_state.[ch] implement the SFrame state management APIs.
Some aspects of the implementation are "POC like". These will need to
addressed for the implementation to become more palatable:
- dealing with only Elf64_Phdr (no Elf32_Phdr) at this time, and other
TODOs in the iterate_phdr.c,
- detecting whether a program did a dlopen/dlclose,
- code stubs around user space memory access (.sframe section, ELF hdr
etc.) by the kernel need careful review.
There are more aspects than above; The intention of this patch set is to
help drive the discussion on how to best incorporate an SFrame-based user
space unwinder in the kernel.
Signed-off-by: Indu Bhagat <indu.bhagat@oracle.com>
---
arch/arm64/include/asm/sframe_regs.h | 37 +++
arch/x86/include/asm/sframe_regs.h | 34 +++
include/sframe/sframe_regs.h | 11 +
include/sframe/sframe_unwind.h | 62 ++++
lib/sframe/Makefile | 8 +-
lib/sframe/iterate_phdr.c | 113 +++++++
lib/sframe/iterate_phdr.h | 34 +++
lib/sframe/sframe_state.c | 424 +++++++++++++++++++++++++++
lib/sframe/sframe_state.h | 80 +++++
lib/sframe/sframe_unwind.c | 208 +++++++++++++
10 files changed, 1010 insertions(+), 1 deletion(-)
create mode 100644 arch/arm64/include/asm/sframe_regs.h
create mode 100644 arch/x86/include/asm/sframe_regs.h
create mode 100644 include/sframe/sframe_regs.h
create mode 100644 include/sframe/sframe_unwind.h
create mode 100644 lib/sframe/iterate_phdr.c
create mode 100644 lib/sframe/iterate_phdr.h
create mode 100644 lib/sframe/sframe_state.c
create mode 100644 lib/sframe/sframe_state.h
create mode 100644 lib/sframe/sframe_unwind.c
diff --git a/arch/arm64/include/asm/sframe_regs.h b/arch/arm64/include/asm/sframe_regs.h
new file mode 100644
index 000000000000..ae9ab9d5d3c1
--- /dev/null
+++ b/arch/arm64/include/asm/sframe_regs.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2023, Oracle and/or its affiliates.
+ */
+
+#ifdef ASM_ARM64_SFRAME_REGS_H
+#define ASM_ARM64_SFRAME_REGS_H
+
+#define STACK_ACCESS_LEN 8
+
+static inline uint64_t
+get_ptregs_ip(struct pt_regs *regs)
+{
+ return regs->pc;
+}
+
+static inline uint64_t
+get_ptregs_sp(struct pt_regs *regs)
+{
+ return regs->sp;
+}
+
+static inline uint64_t
+get_ptregs_fp(struct pt_regs *regs)
+{
+#define UNWIND_AARCH64_X29 29 /* 64-bit frame pointer. */
+ return (uint64_t)regs->regs[UNWIND_AARCH64_X29];
+}
+
+static inline uint64_t
+get_ptregs_ra(struct pt_regs *regs)
+{
+#define UNWIND_AARCH64_X30 30 /* 64-bit link pointer. */
+ return (uint64_t)regs->regs[UNWIND_AARCH64_X30];
+}
+
+#endif /* ASM_ARM64_SFRAME_REGS_H */
diff --git a/arch/x86/include/asm/sframe_regs.h b/arch/x86/include/asm/sframe_regs.h
new file mode 100644
index 000000000000..99f84955854a
--- /dev/null
+++ b/arch/x86/include/asm/sframe_regs.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2023, Oracle and/or its affiliates.
+ */
+
+#ifndef ASM_X86_SFRAME_REGS_H
+#define ASM_X86_SFRAME_REGS_H
+
+#define STACK_ACCESS_LEN 8
+
+static inline uint64_t
+get_ptregs_ip(struct pt_regs *regs)
+{
+ return (uint64_t)regs->ip;
+}
+
+static inline uint64_t
+get_ptregs_sp(struct pt_regs *regs)
+{
+ return (uint64_t)regs->sp;
+}
+
+static inline uint64_t
+get_ptregs_fp(struct pt_regs *regs)
+{
+ return (uint64_t)regs->bp;
+}
+
+static inline uint64_t
+get_ptregs_ra(struct pt_regs *regs)
+{
+ return 0; /* SFRAME_CFA_FIXED_RA_INVALID */
+}
+#endif /* ASM_X86_SFRAME_REGS_H */
diff --git a/include/sframe/sframe_regs.h b/include/sframe/sframe_regs.h
new file mode 100644
index 000000000000..32b67f7a7c78
--- /dev/null
+++ b/include/sframe/sframe_regs.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2023, Oracle and/or its affiliates.
+ */
+
+#ifndef _SFRAME_REGS_H
+#define _SFRAME_REGS_H
+
+#include <asm/sframe_regs.h>
+
+#endif /* _SFRAME_REGS_H */
diff --git a/include/sframe/sframe_unwind.h b/include/sframe/sframe_unwind.h
new file mode 100644
index 000000000000..3e2c12816b60
--- /dev/null
+++ b/include/sframe/sframe_unwind.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2023, Oracle and/or its affiliates.
+ */
+
+#ifndef _SFRAME_UNWIND_H
+#define _SFRAME_UNWIND_H
+
+#include <linux/sched.h>
+#include <linux/perf_event.h>
+
+#define PT_GNU_SFRAME 0x6474e554
+
+/*
+ * State used for SFrame-based stack tracing for a user space task.
+ */
+struct user_unwind_state {
+ uint64_t pc, sp, fp, ra;
+ enum stack_type stype;
+ struct task_struct *task;
+ bool error;
+};
+
+/*
+ * APIs for an SFrame based stack tracer.
+ */
+
+void sframe_unwind_start(struct user_unwind_state *state,
+ struct task_struct *task, struct pt_regs *regs);
+bool sframe_unwind_next_frame(struct user_unwind_state *state);
+uint64_t sframe_unwind_get_return_address(struct user_unwind_state *state);
+
+static inline bool sframe_unwind_done(struct user_unwind_state *state)
+{
+ return state->stype == STACK_TYPE_UNKNOWN;
+}
+
+static inline bool sframe_unwind_error(struct user_unwind_state *state)
+{
+ return state->error;
+}
+
+/*
+ * APIs to manage the SFrame state per task for stack tracing.
+ */
+
+extern struct sframe_state *unwind_sframe_state_alloc(struct task_struct *task);
+extern int unwind_sframe_state_update(struct task_struct *task);
+extern void unwind_sframe_state_cleanup(struct task_struct *task);
+
+extern bool unwind_sframe_state_valid_p(struct sframe_state *sfstate);
+extern bool unwind_sframe_state_ready_p(struct sframe_state *sftate);
+
+/*
+ * Get the callchain using SFrame unwind info for the given task.
+ */
+extern int
+sframe_callchain_user(struct task_struct *task,
+ struct perf_callchain_entry_ctx *entry,
+ struct pt_regs *regs);
+
+#endif /* _SFRAME_UNWIND_H */
diff --git a/lib/sframe/Makefile b/lib/sframe/Makefile
index 4e4291d9294f..5ee9e3e7ec93 100644
--- a/lib/sframe/Makefile
+++ b/lib/sframe/Makefile
@@ -1,5 +1,11 @@
# SPDX-License-Identifier: GPL-2.0
##################################
-obj-$(CONFIG_USER_UNWINDER_SFRAME) += sframe_read.o \
+obj-$(CONFIG_USER_UNWINDER_SFRAME) += iterate_phdr.o \
+ sframe_read.o \
+ sframe_state.o \
+ sframe_unwind.o
+CFLAGS_iterate_phdr.o += -I $(srctree)/lib/sframe/ -Wno-error=declaration-after-statement
CFLAGS_sframe_read.o += -I $(srctree)/lib/sframe/
+CFLAGS_sframe_state.o += -I $(srctree)/lib/sframe/
+CFLAGS_sframe_unwind.o += -I $(srctree)/lib/sframe/
diff --git a/lib/sframe/iterate_phdr.c b/lib/sframe/iterate_phdr.c
new file mode 100644
index 000000000000..c10d590ecc67
--- /dev/null
+++ b/lib/sframe/iterate_phdr.c
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2023, Oracle and/or its affiliates.
+ */
+
+#include <linux/elf.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/mm_types.h>
+
+#include "iterate_phdr.h"
+
+/*
+ * Iterate over the task's memory mappings and find the ELF headers.
+ *
+ * This is expected to be called from perf_callchain_user(), so user process
+ * context is expected.
+ */
+
+int iterate_phdr(int (*callback)(struct phdr_info *info,
+ struct task_struct *task,
+ void *data),
+ struct task_struct *task, void *data)
+{
+ struct mm_struct *mm;
+ struct vm_area_struct *vma_mt;
+ struct page *page;
+
+ Elf64_Ehdr *ehdr;
+ struct phdr_info phinfo;
+
+ int ret = 0, res = 0;
+ int err = 0;
+ bool first = true;
+
+ memset(&phinfo, 0, sizeof(struct phdr_info));
+
+ mm = task->mm;
+
+ MA_STATE(mas, &mm->mm_mt, 0, 0);
+
+ mas_for_each(&mas, vma_mt, ULONG_MAX) {
+ /* ELF header has a fixed place in the file, starting at offset
+ * zero.
+ */
+ if (vma_mt->vm_pgoff)
+ continue;
+
+ /* For the callback to infer if its the prog or DSO we are
+ * dealing with.
+ */
+ phinfo.pi_prog = first;
+ first = false;
+ /* FIXME TODO
+ * - This code assumes 64-bit ELF by using Elf64_Ehdr.
+ * - Detect the case when ELF program headers to be of
+ * size > 1 page.
+ */
+
+ /* FIXME TODO KERNEL
+ * - get_user_pages_WHAT, which API.
+ * What flags ? Is this correct ?
+ */
+ ret = get_user_pages_remote(mm, vma_mt->vm_start, 1, FOLL_GET,
+ &page, &vma_mt, NULL);
+ if (ret <= 0)
+ continue;
+
+ /* The first page must have the ELF header. */
+ ehdr = vmap(&page, 1, VM_MAP, PAGE_KERNEL);
+ if (!ehdr)
+ goto put_page;
+
+ /* Check for magic bytes to make sure this is ehdr. */
+ err = 0;
+ err |= ((ehdr->e_ident[EI_MAG0] != ELFMAG0)
+ || (ehdr->e_ident[EI_MAG1] != ELFMAG1)
+ || (ehdr->e_ident[EI_MAG2] != ELFMAG2)
+ || (ehdr->e_ident[EI_MAG3] != ELFMAG3));
+ if (err)
+ goto unmap;
+
+ /*
+ * FIXME TODO handle the case when number of program headers is
+ * greater than or equal to PN_XNUM later.
+ */
+ if (ehdr->e_phnum == PN_XNUM)
+ goto unmap;
+ /*
+ * FIXME TODO handle the case when Elf phdrs span more than one
+ * page later ?
+ */
+ if ((sizeof(Elf64_Ehdr) + ehdr->e_phentsize * ehdr->e_phnum)
+ > PAGE_SIZE)
+ goto unmap;
+
+ /* Save the location of program headers and the phnum. */
+ phinfo.pi_addr = vma_mt->vm_start;
+ phinfo.pi_phdr = (void *)ehdr + ehdr->e_phoff;
+ phinfo.pi_phnum = ehdr->e_phnum;
+
+ res = callback(&phinfo, task, data);
+unmap:
+ vunmap(ehdr);
+put_page:
+ put_page(page);
+
+ if (res < 0)
+ break;
+ }
+
+ return res;
+}
diff --git a/lib/sframe/iterate_phdr.h b/lib/sframe/iterate_phdr.h
new file mode 100644
index 000000000000..78e73cade579
--- /dev/null
+++ b/lib/sframe/iterate_phdr.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2023, Oracle and/or its affiliates.
+ */
+
+#ifndef ITERATE_PHDR_H_
+#define ITERATE_PHDR_H_
+
+#include <linux/sched.h>
+
+struct phdr_info {
+ /* Determine whether prog or DSO. */
+ bool pi_prog;
+ /* Base address. */
+ unsigned long pi_addr;
+ /* Reference to the ELF program headers of the object. */
+ void *pi_phdr;
+ /* Number of entries in the program header table. */
+ unsigned int pi_phnum;
+ /*
+ * Following two fields are for optimization - keep track of any
+ * dlopen/dlclose activity done after program startup.
+ * FIXME TODO Currently unused.
+ */
+ uint64_t pi_adds;
+ uint64_t pi_subs;
+};
+
+int iterate_phdr(int (*callback)(struct phdr_info *info,
+ struct task_struct *task,
+ void *data),
+ struct task_struct *task, void *data);
+
+#endif /* ITERATE_PHDR_H_ */
diff --git a/lib/sframe/sframe_state.c b/lib/sframe/sframe_state.c
new file mode 100644
index 000000000000..a34f762acf42
--- /dev/null
+++ b/lib/sframe/sframe_state.c
@@ -0,0 +1,424 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2023, Oracle and/or its affiliates.
+ */
+
+#include <linux/elf.h>
+#include <linux/vmalloc.h>
+#include <sframe/sframe_unwind.h>
+
+#include "sframe_state.h"
+#include "iterate_phdr.h"
+
+#define NUM_OF_DSOS 32
+
+static int num_entries = NUM_OF_DSOS;
+
+/*
+ * Error codes for SFrame state.
+ *
+ * All condition codes less than SFRAME_UNW_INFO_OK are used to indicate
+ * an unhealthy SFrame state.
+ */
+enum {
+ SFRAME_UNW_INVAL_SFRAME = -3, /* An SFrame section is invalid. */
+ SFRAME_UNW_NO_SFSTATE = -2, /* SFrame state could not be set up. */
+ SFRAME_UNW_NO_PROG_SFRAME = -1, /* No SFrame section in prog. */
+ SFRAME_UNW_INFO_OK = 0,
+ SFRAME_UNW_PARTIAL_INFO = 1,
+};
+
+static int sframe_unw_info_cleanup(struct sframe_unw_info *sfu_info)
+{
+ int i;
+
+ if (!sfu_info)
+ return 1;
+
+ if (sfu_info->sframe_vmap) {
+ vunmap(sfu_info->sframe_vmap);
+ sfu_info->sframe_vmap = NULL;
+ }
+
+ if (sfu_info->sframe_pages) {
+ for (i = 0; i < sfu_info->sframe_npages; i++)
+ put_page(sfu_info->sframe_pages[i]);
+ kfree(sfu_info->sframe_pages);
+ sfu_info->sframe_pages = NULL;
+ }
+
+ kfree(sfu_info->sfsec);
+ sfu_info->sfsec = NULL;
+
+ return 0;
+}
+
+static void sframe_unw_info_init(struct sframe_unw_info *sfu_info,
+ uint64_t sframe_addr, size_t sframe_size,
+ uint64_t text_addr, size_t text_size)
+{
+ if (!sfu_info)
+ return;
+ sfu_info->sframe_addr = sframe_addr;
+ sfu_info->sframe_size = sframe_size;
+ sfu_info->text_addr = text_addr;
+ sfu_info->text_size = text_size;
+ sfu_info->sframe_pages = NULL;
+ sfu_info->sframe_vmap = NULL;
+}
+
+/*
+ * Get the user pages containing the SFrame section and set up the SFrame
+ * section object for the stacktracer to use later.
+ */
+static int sframe_unw_info_init_sfsec(struct sframe_state *sfstate,
+ struct sframe_unw_info *sfu_info)
+{
+ int i;
+ int err = 0;
+ struct task_struct *task;
+ struct vm_area_struct *vma;
+ struct sframe_sec *sfsec;
+ struct page **pages;
+ unsigned long npages;
+ char *sframe_vmap, *sframe_buf;
+
+ sfsec = kmalloc(sframe_sec_sizeof(), GFP_KERNEL);
+ if (!sfsec)
+ return -ENOMEM;
+ sfu_info->sfsec = sfsec;
+
+ task = sfstate->task;
+
+ vma = find_vma(task->mm, sfu_info->sframe_addr);
+ npages = vma_pages(vma);
+ pages = kmalloc((sizeof(struct page *) * npages), GFP_KERNEL);
+ if (!pages) {
+ err = -ENOMEM;
+ goto free_sfsec;
+ }
+
+#if 0
+ npages = get_user_pages_remote(task->mm, sfu_info->sframe_addr, npages,
+ FOLL_GET, pages, &vma, NULL);
+#endif
+ npages = get_user_pages_unlocked(vma->vm_start, npages, pages,
+ FOLL_GET);
+ if (npages <= 0)
+ goto free_page;
+
+ sfu_info->sframe_pages = pages;
+ sfu_info->sframe_npages = npages;
+
+ sframe_vmap = vmap(pages, npages, VM_MAP, PAGE_KERNEL);
+ if (!sframe_vmap)
+ goto put_page;
+ sfu_info->sframe_vmap = sframe_vmap;
+
+ sframe_buf = sframe_vmap + (sfu_info->sframe_addr - vma->vm_start);
+ err = sframe_sec_init(sfu_info->sfsec,
+ sframe_buf,
+ sfu_info->sframe_size);
+
+ /*
+ * put_page, vunmap should not be done yet as SFrame section will be
+ * used when do_sframe_unwind ().
+ * In the rare possibility that this is a corrupt SFrame section,
+ * clean up the sframe_unw_info object, and signal error so the
+ * caller.
+ */
+ if (!err)
+ return 0;
+
+ vunmap(sframe_vmap);
+put_page:
+ for (i = 0; i < npages; i++)
+ put_page(pages[i]);
+free_page:
+ kfree(pages);
+free_sfsec:
+ kfree(sfu_info->sfsec);
+ sfu_info->sfsec = NULL;
+
+ return err;
+}
+
+static int sframe_state_unw_info_list_add(struct sframe_state *sfstate,
+ struct sframe_unw_info *sfu_info)
+{
+ size_t realloc_size = 0;
+
+ if (sfstate->su_dsos.alloced == 0) {
+ sfstate->su_dsos.entry
+ = kcalloc(num_entries, sizeof(struct sframe_unw_info),
+ GFP_KERNEL);
+ if (!sfstate->su_dsos.entry)
+ return -ENOMEM;
+ sfstate->su_dsos.alloced = num_entries;
+ } else if (sfstate->su_dsos.used == sfstate->su_dsos.alloced) {
+ realloc_size = ((sfstate->su_dsos.alloced + num_entries)
+ * sizeof(struct sframe_unw_info));
+ sfstate->su_dsos.entry
+ = krealloc(sfstate->su_dsos.entry, realloc_size, GFP_KERNEL);
+ if (!sfstate->su_dsos.entry)
+ return -ENOMEM;
+
+ memset(&sfstate->su_dsos.entry[sfstate->su_dsos.alloced], 0,
+ num_entries * sizeof(struct sframe_unw_info));
+ sfstate->su_dsos.alloced += num_entries;
+ }
+
+ sfstate->su_dsos.entry[sfstate->su_dsos.used++] = *sfu_info;
+ return 0;
+}
+
+static int sframe_state_add_unw_info(struct sframe_state *sfstate,
+ uint64_t sframe_addr, size_t sframe_size,
+ uint64_t text_addr, size_t text_size)
+{
+ struct sframe_unw_info *sfu_info;
+ int ret = 0;
+
+ sfu_info = kzalloc(sizeof(*sfu_info), GFP_KERNEL);
+ if (!sfu_info)
+ return -ENOMEM;
+
+ sframe_unw_info_init(sfu_info, sframe_addr, sframe_size, text_addr,
+ text_size);
+
+ if (sframe_unw_info_init_sfsec(sfstate, sfu_info)) {
+ ret = SFRAME_UNW_INVAL_SFRAME;
+ goto end;
+ }
+
+ /* Add sframe_unw_info object for the program or its DSOs. */
+ if (!sfstate->su_prog.sframe_size)
+ memcpy(&(sfstate->su_prog), sfu_info, sizeof(*sfu_info));
+ else
+ ret = sframe_state_unw_info_list_add(sfstate, sfu_info);
+
+end:
+ kfree(sfu_info);
+
+ return ret;
+}
+
+/*
+ * Add SFrame unwind info for the given (prog or DSO) phdr_info into the SFrame
+ * state object in the task.
+ *
+ * Callback routine from iterate_phdr function.
+ *
+ * Returns 0 if success.
+ */
+static int add_sframe_unwind_info(struct phdr_info *info,
+ struct task_struct *task,
+ void *data)
+{
+ int err = 0;
+ int p_type;
+ struct sframe_state *sframe_state;
+ /* FIXME TODO what if its Elf32_Phdr. */
+ Elf64_Phdr *phdr = NULL, *sframe_phdr = NULL, *text_phdr = NULL;
+
+ uint64_t text_addr;
+ size_t text_size;
+ uint64_t sframe_addr;
+ size_t sframe_size;
+
+ phdr = info->pi_phdr;
+ sframe_state = (struct sframe_state *)data;
+
+ for (int j = 0; j < info->pi_phnum; j++) {
+ p_type = phdr[j].p_type;
+ /* Find the executable section and the SFrame section. */
+ if (p_type == PT_GNU_SFRAME) {
+ sframe_phdr = &phdr[j];
+ continue;
+ }
+ /* FIXME TODO Elf 101 - there be only one PF_X. Looks like it? */
+ if (p_type == PT_LOAD && phdr[j].p_flags & PF_X) {
+ /*
+ * This is the executable part of the ELF binary
+ * containing the instructions, and may contain
+ * sections other than .text. The usage of `text` in
+ * this function is colloquial.
+ */
+ text_phdr = &phdr[j];
+ continue;
+ }
+
+ if (sframe_phdr && text_phdr)
+ break;
+ }
+
+ /*
+ * If there is no SFrame section for the prog, SFrame based unwinding
+ * should not be attempted. If no SFrame section is found for a DSO,
+ * however, it may still be possible to generate useful stacktraces
+ * using the SFrame sections' for other parts of the program.
+ */
+ if (!sframe_phdr)
+ return info->pi_prog
+ ? SFRAME_UNW_NO_PROG_SFRAME
+ : SFRAME_UNW_PARTIAL_INFO;
+
+ text_addr = info->pi_prog ? text_phdr->p_vaddr
+ : info->pi_addr + text_phdr->p_vaddr;
+ text_size = text_phdr->p_memsz;
+ sframe_addr = info->pi_prog ? sframe_phdr->p_vaddr
+ : info->pi_addr + sframe_phdr->p_vaddr;
+ sframe_size = sframe_phdr->p_memsz;
+
+ /* Add the SFrame unwind info object to the list. */
+ err = sframe_state_add_unw_info(sframe_state, sframe_addr,
+ sframe_size, text_addr, text_size);
+ /*
+ * An error indicates SFrame unwind info addition failed, but one can
+ * still unwind using .sframe for other parts of the program.
+ */
+ if (err)
+ return SFRAME_UNW_PARTIAL_INFO;
+
+ return SFRAME_UNW_INFO_OK;
+}
+
+static int add_sframe_unwind_info_for_task(struct task_struct *task)
+{
+ struct sframe_state *sfstate = task->sframe_state;
+
+ /* sfstate should be already allocated. */
+ if (!sfstate)
+ return SFRAME_UNW_NO_SFSTATE;
+
+ return iterate_phdr(add_sframe_unwind_info, task, sfstate);
+}
+
+/*
+ * SFrame Unwind Info APIs.
+ */
+
+struct sframe_unw_info *sframe_state_find_unw_info(struct sframe_state *sfstate,
+ uint64_t addr)
+{
+ struct sframe_unw_info_list *unw_info_list;
+ struct sframe_unw_info sfu_info;
+ int i;
+
+ if (!sfstate)
+ return NULL;
+
+ if (sfstate->su_prog.text_addr < addr
+ && sfstate->su_prog.text_addr + sfstate->su_prog.text_size > addr)
+ return &sfstate->su_prog;
+
+ unw_info_list = &sfstate->su_dsos;
+ for (i = 0; i < unw_info_list->used; ++i) {
+ sfu_info = unw_info_list->entry[i];
+ if ((sfu_info.text_addr <= addr)
+ && (sfu_info.text_addr + sfu_info.text_size >= addr))
+ return &unw_info_list->entry[i];
+ }
+
+ return NULL;
+}
+
+struct sframe_sec *sframe_unw_info_get_sfsec(struct sframe_unw_info *sfu_info)
+{
+ if (!sfu_info || !sfu_info->sfsec)
+ return NULL;
+
+ return sfu_info->sfsec;
+}
+
+/*
+ * SFrame state APIs.
+ */
+
+static void unwind_sframe_state_free(struct sframe_state *sfstate)
+{
+ struct sframe_unw_info_list *unw_info_list;
+ struct sframe_unw_info *sfu_info;
+ int i;
+
+ if (!sfstate)
+ return;
+
+ sfu_info = &(sfstate->su_prog);
+ sframe_unw_info_cleanup(sfu_info);
+
+ unw_info_list = &sfstate->su_dsos;
+ for (i = 0; i < unw_info_list->used; ++i) {
+ sfu_info = &unw_info_list->entry[i];
+ sframe_unw_info_cleanup(sfu_info);
+ }
+
+ kfree(sfstate->su_dsos.entry);
+ sfstate->su_dsos.entry = NULL;
+ sfstate->su_dsos.alloced = 0;
+ sfstate->su_dsos.used = 0;
+
+ sfstate->task = NULL;
+}
+
+bool unwind_sframe_state_valid_p(struct sframe_state *sfstate)
+{
+ return (sfstate && sfstate->cond != SFSTATE_INVAL);
+}
+
+bool unwind_sframe_state_ready_p(struct sframe_state *sfstate)
+{
+ return (sfstate && sfstate->cond == SFSTATE_READY);
+}
+
+struct sframe_state *unwind_sframe_state_alloc(struct task_struct *task)
+{
+ struct sframe_state *sfstate = NULL;
+ /*
+ * Check if the task's SFrame unwind information needs to be set up.
+ */
+ sfstate = task->sframe_state;
+ if (!sfstate) {
+ /* Free'd up in release_task(). */
+ sfstate = kzalloc(sizeof(*sfstate), GFP_KERNEL);
+ if (!sfstate)
+ return NULL;
+ sfstate->cond = SFSTATE_ALLOCED;
+ task->sframe_state = sfstate;
+ }
+
+ sfstate->task = task;
+
+ return sfstate;
+}
+
+void unwind_sframe_state_cleanup(struct task_struct *task)
+{
+ if (!task->sframe_state)
+ return;
+
+ unwind_sframe_state_free(task->sframe_state);
+ kfree(task->sframe_state);
+ task->sframe_state = NULL;
+}
+
+/*
+ * Update the SFrame unwind state object cached per task.
+ *
+ * Sets cond to SFSTATE_INVAL if any error.
+ * Sets cond to SFSTATE_READY if no error.
+ */
+int unwind_sframe_state_update(struct task_struct *task)
+{
+ struct sframe_state *sfstate = NULL;
+ int sferr = 0;
+
+ sfstate = task->sframe_state;
+ if (sfstate->cond == SFSTATE_ALLOCED || sfstate->cond == SFSTATE_STALE)
+ sferr = add_sframe_unwind_info_for_task(task);
+
+ sfstate->cond = (sferr < SFRAME_UNW_INFO_OK) ? SFSTATE_INVAL
+ : SFSTATE_READY;
+
+ return sferr < SFRAME_UNW_INFO_OK;
+}
diff --git a/lib/sframe/sframe_state.h b/lib/sframe/sframe_state.h
new file mode 100644
index 000000000000..e2f0251b30d5
--- /dev/null
+++ b/lib/sframe/sframe_state.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2023, Oracle and/or its affiliates.
+ */
+
+#ifndef SFRAME_STATE_H
+#define SFRAME_STATE_H
+
+#include "sframe_read.h"
+
+/*
+ * SFrame unwind info of the program or a DSO.
+ */
+struct sframe_unw_info {
+ /* SFrame segment's virtual addr. */
+ uint64_t sframe_addr;
+ /* SFrame segment's size. */
+ uint64_t sframe_size;
+ /*
+ * Keep a reference to the pages and vma for the lifetime of this SFrame
+ * unwind info object.
+ */
+ struct page **sframe_pages;
+ unsigned long sframe_npages;
+ /* Address of the vmap'd area that contains the .sframe section. */
+ const char *sframe_vmap;
+ /*
+ * text_addr and text_size below are used only for looking up the
+ * associated SFrame section. See sframe_state_find_unw_info.
+ */
+ /* Text segment's virtual addr. */
+ uint64_t text_addr;
+ /* Text segment's size. */
+ uint64_t text_size;
+ /* SFrame section contents. */
+ struct sframe_sec *sfsec;
+};
+
+/*
+ * List of SFrame unwind info objects.
+ * Typically used to represent SFrame unwind info for set of shared libraries
+ * of a program.
+ */
+struct sframe_unw_info_list {
+ /* Entries allocated. */
+ int alloced;
+ /* Entries used. */
+ int used;
+ /* List of SFrame unwind info objects. */
+ struct sframe_unw_info *entry;
+};
+
+enum sframe_state_code {
+ SFSTATE_READY = 0, /* SFrame unwind OK to use. */
+ SFSTATE_INVAL = 1, /* SFrame unwind is invalid. */
+ SFSTATE_ALLOCED = 2, /* SFrame unwind is alloc'd but not initialized. */
+ SFSTATE_STALE = 3, /* SFrame unwind is stale and not OK to use. */
+};
+
+/*
+ * Per task SFrame unwind state.
+ */
+struct sframe_state {
+ /* The task that this SFrame unwind info belongs to. */
+ struct task_struct *task;
+ /* Current SFrame state condition. */
+ enum sframe_state_code cond;
+ /* SFrame stack trace info for program. */
+ struct sframe_unw_info su_prog;
+ /* SFrame stack trace info for the shared objects. */
+ struct sframe_unw_info_list su_dsos;
+};
+
+extern struct sframe_unw_info *
+sframe_state_find_unw_info(struct sframe_state *sfstate, uint64_t addr);
+
+extern struct sframe_sec *
+sframe_unw_info_get_sfsec(struct sframe_unw_info *sfu_info);
+
+#endif /* SFRAME_STATE_H. */
diff --git a/lib/sframe/sframe_unwind.c b/lib/sframe/sframe_unwind.c
new file mode 100644
index 000000000000..32716008d0b4
--- /dev/null
+++ b/lib/sframe/sframe_unwind.c
@@ -0,0 +1,208 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2023, Oracle and/or its affiliates.
+ */
+
+#include <linux/perf_event.h>
+#include <sframe/sframe_unwind.h>
+#include <sframe/sframe_regs.h>
+
+#include "sframe_state.h"
+
+/*
+ * Check if ADDR is in text segment (for which we also found a corresponding
+ * SFrame section. The address is considered invalid, if it is not in any of
+ * the address ranges of the text segments of either the main program or any of
+ * it's DSOs (for which a corresponding SFrame section existed).
+ * Return true if valid, false otherwise.
+ */
+static bool unwind_sframe_ip_ok(struct sframe_state *sfstate, uint64_t addr)
+{
+ return (sframe_state_find_unw_info(sfstate, addr) != NULL);
+}
+
+void sframe_unwind_start(struct user_unwind_state *user_unw_state,
+ struct task_struct *task, struct pt_regs *regs)
+{
+ if (!task->sframe_state
+ || !unwind_sframe_state_ready_p(task->sframe_state))
+ goto error;
+
+ if (!regs)
+ goto error;
+
+ user_unw_state->sp = get_ptregs_sp(regs);
+ user_unw_state->pc = get_ptregs_ip(regs);
+ user_unw_state->fp = get_ptregs_fp(regs);
+
+ user_unw_state->task = task;
+ user_unw_state->stype = STACK_TYPE_TASK;
+
+ /* We need to skip ahead by one. */
+ sframe_unwind_next_frame(user_unw_state);
+ return;
+error:
+ user_unw_state->error = true;
+}
+
+bool sframe_unwind_next_frame(struct user_unwind_state *ustate)
+{
+ struct sframe_unw_info *sfu_info;
+ struct sframe_sec *sfsec;
+ struct task_struct *task;
+ struct sframe_state *sfstate;
+
+ int32_t ra_offset, rfp_offset, cfa_offset;
+
+ uint64_t cfa;
+ uint64_t return_addr;
+ uint64_t rfp_stack_loc;
+ uint64_t ra_stack_loc;
+ uint64_t sframe_vma;
+
+ uint64_t pc = ustate->pc;
+ uint64_t sp = ustate->sp;
+ uint64_t fp = ustate->fp;
+
+ struct sframe_fre fre, *frep = &fre;
+ int err = 0;
+
+ task = ustate->task;
+ sfstate = task->sframe_state;
+ /*
+ * Indicate end of stack trace when SFrame unwind info
+ * is not found for the given PC.
+ */
+ sfu_info = sframe_state_find_unw_info(sfstate, pc);
+
+ if (!sfu_info)
+ goto the_end;
+
+ sfsec = sframe_unw_info_get_sfsec(sfu_info);
+
+ /* Find the SFrame FRE. */
+ sframe_vma = sfu_info->sframe_addr;
+ pc -= sframe_vma;
+ err = sframe_sec_find_fre(sfsec, pc, frep);
+
+ if (err != 0)
+ goto error;
+
+ /* Get the CFA offset from the FRE. */
+ cfa_offset = sframe_fre_get_cfa_offset(sfsec, frep, &err);
+ if (err == SFRAME_ERR_FREOFFSET_NOPRESENT)
+ goto error;
+ cfa = ((sframe_fre_get_base_reg_id(frep, &err)
+ == SFRAME_BASE_REG_SP) ? sp : fp) + cfa_offset;
+
+ /* Get the RA offset from the FRE. */
+ ra_offset = sframe_fre_get_ra_offset(sfsec, frep, &err);
+ if (err != 0)
+ goto error;
+ ra_stack_loc = cfa + ra_offset;
+
+ if (!access_ok((const uint64_t __user *)ra_stack_loc, STACK_ACCESS_LEN))
+ goto error;
+
+ if (__get_user(return_addr, (const uint64_t __user *)ra_stack_loc))
+ goto error;
+
+ /* Get the FP offset from the FRE. */
+ rfp_offset = sframe_fre_get_fp_offset(sfsec, frep, &err);
+ if (err == 0) {
+ rfp_stack_loc = cfa + rfp_offset;
+
+ if (!access_ok((const uint64_t __user *) rfp_stack_loc,
+ STACK_ACCESS_LEN))
+ goto error;
+
+ if (__get_user(fp, (const uint64_t __user *)rfp_stack_loc))
+ goto error;
+ }
+
+ /* Validate and add return address to the list. */
+ if (unwind_sframe_ip_ok(sfstate, return_addr)) {
+ ustate->pc = return_addr;
+ ustate->sp = cfa;
+ ustate->fp = fp;
+
+ } else {
+ goto error;
+ }
+
+ return true;
+
+error:
+ ustate->error = true;
+ return false;
+the_end:
+ ustate->stype = STACK_TYPE_UNKNOWN;
+ return false;
+}
+
+uint64_t sframe_unwind_get_return_address(struct user_unwind_state *state)
+{
+ return state->pc;
+}
+
+/*
+ * Generate stack trace using SFrame stack trace information.
+ * Return 0 if success, 1 otherwise.
+ */
+
+static int do_sframe_unwind(struct task_struct *task,
+ struct sframe_state *sfstate,
+ struct perf_callchain_entry_ctx *entry,
+ struct pt_regs *regs)
+{
+ struct user_unwind_state ustate;
+ uint64_t addr;
+
+ memset(&ustate, 0, sizeof(ustate));
+
+ for (sframe_unwind_start(&ustate, task, regs);
+ !sframe_unwind_done(&ustate) && !sframe_unwind_error(&ustate);
+ sframe_unwind_next_frame(&ustate)) {
+ addr = sframe_unwind_get_return_address(&ustate);
+ if (!addr || perf_callchain_store(entry, addr))
+ break;
+ }
+
+ return 0;
+}
+
+/*
+ * Get the stack trace for the task using SFrame stack trace information.
+ * Returns 0 if success, 1 otherwise.
+ */
+
+int sframe_callchain_user(struct task_struct *task,
+ struct perf_callchain_entry_ctx *entry,
+ struct pt_regs *regs)
+{
+ int ret = 0;
+ struct sframe_state *sfstate;
+
+ if (task != current)
+ return 1;
+
+ /* Get the current task's sframe state. */
+ sfstate = task->sframe_state;
+
+ if (!sfstate)
+ return 1;
+
+ /*
+ * Prepare for stack tracing. State must be SFSTATE_READY at this time.
+ * FIXME TODO SFrame state may be stale because there was a change in
+ * the set of DSOs used by the program, for example.
+ * FIXME TODO Need to update task->sframe_state if program
+ * dlopen/dlclose a DSO.
+ */
+ if (ret || !unwind_sframe_state_ready_p(sfstate))
+ return 1;
+
+ ret = do_sframe_unwind(task, sfstate, entry, regs);
+
+ return ret;
+}
--
2.39.2
next prev parent reply other threads:[~2023-05-01 20:05 UTC|newest]
Thread overview: 32+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-05-01 20:04 [POC 0/5] SFrame based stack tracer for user space in the kernel Indu Bhagat
2023-05-01 20:04 ` [POC 1/5] Kconfig: x86: Add new config options for userspace unwinder Indu Bhagat
2023-05-01 20:04 ` [POC 2/5] task_struct : add additional member for sframe state Indu Bhagat
2023-05-01 20:04 ` [POC 3/5] sframe: add new SFrame library Indu Bhagat
2023-05-01 22:40 ` Steven Rostedt
2023-05-02 5:07 ` Indu Bhagat
2023-05-02 8:46 ` Peter Zijlstra
2023-05-02 9:09 ` Peter Zijlstra
2023-05-02 9:20 ` Peter Zijlstra
2023-05-02 9:28 ` Peter Zijlstra
2023-05-02 9:30 ` Peter Zijlstra
2023-05-03 6:03 ` Indu Bhagat
2023-05-02 10:31 ` Peter Zijlstra
2023-05-02 10:41 ` Peter Zijlstra
2023-05-02 15:22 ` Steven Rostedt
2023-05-01 20:04 ` Indu Bhagat [this message]
2023-05-01 23:00 ` [POC 4/5] sframe: add an SFrame format stack tracer Steven Rostedt
2023-05-02 6:16 ` Indu Bhagat
2023-05-02 8:53 ` Peter Zijlstra
2023-05-02 9:04 ` Peter Zijlstra
2023-05-01 20:04 ` [POC 5/5] x86_64: invoke SFrame based stack tracer for user space Indu Bhagat
2023-05-01 23:11 ` Steven Rostedt
2023-05-02 10:53 ` Peter Zijlstra
2023-05-02 15:27 ` Steven Rostedt
2023-05-16 17:25 ` Andrii Nakryiko
2023-05-16 17:38 ` Steven Rostedt
2023-05-16 17:51 ` Andrii Nakryiko
2024-03-13 14:37 ` Tatsuyuki Ishi
2024-03-13 14:52 ` Steven Rostedt
2024-03-13 14:58 ` Tatsuyuki Ishi
2024-03-13 15:04 ` Steven Rostedt
2023-05-01 22:15 ` [POC 0/5] SFrame based stack tracer for user space in the kernel Steven Rostedt
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230501200410.3973453-5-indu.bhagat@oracle.com \
--to=indu.bhagat@oracle.com \
--cc=andrii@kernel.org \
--cc=daandemeyer@meta.com \
--cc=elena.zannoni@oracle.com \
--cc=kris.van.hees@oracle.com \
--cc=linux-toolchains@vger.kernel.org \
--cc=nick.alcock@oracle.com \
--cc=rostedt@goodmis.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).