On 2011-09-30 01:46, Richard Henderson wrote: > This allows us to generate unwind info for the dynamicly generated > code in the code_gen_buffer. Only i386 is converted at this point. > > Signed-off-by: Richard Henderson > --- > elf.h | 1 + > exec.c | 9 +++ > tcg/i386/tcg-target.c | 125 +++++++++++++++++++++++++++++++--- > tcg/tcg.c | 185 +++++++++++++++++++++++++++++++++++++++++++++++++ > tcg/tcg.h | 2 + > 5 files changed, 313 insertions(+), 9 deletions(-) > > > It's somewhat unfortunate, but the GDB interface requires that we > build a contiguous ELF image. This means we can't place the ELF > header that we generate any place except the beginning of the > code_gen_buffer. While tedious, this isn't terribly difficult. > > With this patch, I now get: > > Breakpoint 1, __ldb_mmu (addr=1001716, mmu_idx=0) > at /home/rth/work/qemu/qemu/softmmu_template.h:86 > 86 { > (gdb) where > #0 __ldb_mmu (addr=1001716, mmu_idx=0) > at /home/rth/work/qemu/qemu/softmmu_template.h:86 > #1 0x0000000040000afc in ?? () > #2 0x000000000053e85c in cpu_x86_exec (env=0x1208d80) > at /home/rth/work/qemu/qemu/cpu-exec.c:565 > #3 0x0000000000542932 in tcg_cpu_exec (env=0x1208d80) > at /home/rth/work/qemu/qemu/cpus.c:913 > #4 cpu_exec_all () at /home/rth/work/qemu/qemu/cpus.c:949 > #5 0x0000000000542ad5 in qemu_tcg_cpu_thread_fn (arg=) > at /home/rth/work/qemu/qemu/cpus.c:688 > #6 0x00000033f1a07b31 in start_thread () from /lib64/libpthread.so.0 > #7 0x00000033f16dfd2d in clone () from /lib64/libc.so.6 > > which is nicely correct. Cool. Would it be possible to tag translated code as well? At TB or maybe even input instruction level? Only in debugging mode of course. > > Comments from the gdb folk are welcome. > > > r~ > > > > diff --git a/elf.h b/elf.h > index 2e05d34..1e56a8c 100644 > --- a/elf.h > +++ b/elf.h > @@ -216,6 +216,7 @@ typedef int64_t Elf64_Sxword; > > #define ELF_ST_BIND(x) ((x) >> 4) > #define ELF_ST_TYPE(x) (((unsigned int) x) & 0xf) > +#define ELF_ST_INFO(bind,type) (((bind) << 4) | (type)) > #define ELF32_ST_BIND(x) ELF_ST_BIND(x) > #define ELF32_ST_TYPE(x) ELF_ST_TYPE(x) > #define ELF64_ST_BIND(x) ELF_ST_BIND(x) > diff --git a/exec.c b/exec.c > index 1e6f732..f6c07d5 100644 > --- a/exec.c > +++ b/exec.c > @@ -464,6 +464,8 @@ static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE] > > static void code_gen_alloc(unsigned long tb_size) > { > + size_t grab; > + > #ifdef USE_STATIC_CODE_GEN_BUFFER > code_gen_buffer = static_code_gen_buffer; > code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE; > @@ -558,6 +560,13 @@ static void code_gen_alloc(unsigned long tb_size) > map_exec(code_gen_buffer, code_gen_buffer_size); > #endif > #endif /* !USE_STATIC_CODE_GEN_BUFFER */ > + > + /* Give GDB unwind info for the code we generate. This grabs a small > + amount of space from the front of the buffer. Account for it. */ > + grab = tcg_register_jit(code_gen_buffer, code_gen_buffer_size); > + code_gen_buffer += grab; > + code_gen_buffer_size -= grab; > + > map_exec(code_gen_prologue, sizeof(code_gen_prologue)); > code_gen_buffer_max_size = code_gen_buffer_size - > (TCG_MAX_OP_SIZE * OPC_BUF_SIZE); > diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c > index 281f87d..462f455 100644 > --- a/tcg/i386/tcg-target.c > +++ b/tcg/i386/tcg-target.c > @@ -1913,22 +1913,29 @@ static int tcg_target_callee_save_regs[] = { > #endif > }; > > +/* Compute frame size via macros, to share between tcg_target_qemu_prologue > + and tcg_register_jit. */ > + > +#define PUSH_SIZE \ > + ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \ > + * (TCG_TARGET_REG_BITS / 8)) > + > +#define FRAME_SIZE \ > + ((PUSH_SIZE \ > + + TCG_STATIC_CALL_ARGS_SIZE \ > + + CPU_TEMP_BUF_NLONGS * sizeof(long) \ > + + TCG_TARGET_STACK_ALIGN - 1) \ > + & ~(TCG_TARGET_STACK_ALIGN - 1)) > + > /* Generate global QEMU prologue and epilogue code */ > static void tcg_target_qemu_prologue(TCGContext *s) > { > - int i, frame_size, push_size, stack_addend; > + int i, stack_addend; > > /* TB prologue */ > > /* Reserve some stack space, also for TCG temps. */ > - push_size = 1 + ARRAY_SIZE(tcg_target_callee_save_regs); > - push_size *= TCG_TARGET_REG_BITS / 8; > - > - frame_size = push_size + TCG_STATIC_CALL_ARGS_SIZE + > - CPU_TEMP_BUF_NLONGS * sizeof(long); > - frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) & > - ~(TCG_TARGET_STACK_ALIGN - 1); > - stack_addend = frame_size - push_size; > + stack_addend = FRAME_SIZE - PUSH_SIZE; > tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE, > CPU_TEMP_BUF_NLONGS * sizeof(long)); > > @@ -1988,3 +1995,103 @@ static void tcg_target_init(TCGContext *s) > > tcg_add_target_add_op_defs(x86_op_defs); > } > + > +extern char debug_frame[]; > +extern char debug_frame_end[]; > +extern void *debug_frame_code_start; > +extern size_t debug_frame_code_len; > +extern char debug_frame_frame_size[]; > + > +#if TCG_TARGET_REG_BITS == 64 > +#define ELF_HOST_MACHINE EM_X86_64 > +asm(".data\n" > +" .align 8\n" > +"debug_frame:\n" > +" .long 99f-0f\n" /* Length of CIE */ > +"0: .long -1\n" /* CIE identifier */ > +" .byte 1\n" /* CIE version */ > +" .byte 0\n" /* CIE augmentation (none) */ > +" .byte 1\n" /* CIE code alignment factor */ > +" .byte 0x78\n" /* CIE data alignment factor (sleb128 -8) */ > +" .byte 16\n" /* CIE return address column */ > +" .align 8\n" > +"99:\n" /* End of CIE */ > +" .long 99f-0f\n" /* Length of FDE */ > +"0: .long 0\n" /* FDE CIE offset (start of section) */ > +"debug_frame_code_start:\n" > +" .quad 0\n" /* FDE start (to be filled in) */ > +"debug_frame_code_len:\n" > +" .quad 0\n" /* FDE length (to be filled in) */ > +" .byte 12\n" /* DW_CFA_def_cfa */ > +" .byte 7\n" /* %rsp */ > +"debug_frame_frame_size:\n" > +" .byte 0, 0\n" /* frame_size (to be filled in) */ > +" .byte 0x90, 1\n" /* DW_CFA_offset, %rip, -8 */ > +/* The following ordering must match tcg_target_callee_save_regs. */ > +" .byte 0x86, 2\n" /* DW_CFA_offset, %rbp, -16 */ > +" .byte 0x83, 3\n" /* DW_CFA_offset, %rbx, -24 */ > +" .byte 0x8c, 4\n" /* DW_CFA_offset, %r12, -32 */ > +" .byte 0x8d, 5\n" /* DW_CFA_offset, %r13, -40 */ > +" .byte 0x8e, 6\n" /* DW_CFA_offset, %r14, -48 */ > +" .byte 0x8f, 7\n" /* DW_CFA_offset, %r15, -56 */ > +" .align 8\n" > +"99:\n" /* End of FDE */ > +"debug_frame_end:\n" > +".previous" > +); > +#else > +#define ELF_HOST_MACHINE EM_386 > +asm(".data\n" > +" .align 4\n" > +"debug_frame:\n" > +" .long 99f-0f\n" /* Length of CIE */ > +"0: .long -1\n" /* CIE identifier */ > +" .byte 1\n" /* CIE version */ > +" .byte 0\n" /* CIE augmentation (none) */ > +" .byte 1\n" /* CIE code alignment factor */ > +" .byte 0x7c\n" /* CIE data alignment factor (sleb128 -4) */ > +" .byte 8\n" /* CIE return address column */ > +" .align 4\n" > +"99:\n" /* End of CIE */ > +" .long 99f-0f\n" /* Length of FDE */ > +"0: .long 0\n" /* FDE CIE offset (start of section) */ > +"debug_frame_code_start:\n" > +" .long 0\n" /* FDE start (to be filled in) */ > +"debug_frame_code_len:\n" > +" .long 0\n" /* FDE length (to be filled in) */ > +" .byte 12\n" /* DW_CFA_def_cfa */ > +" .byte 4\n" /* %rsp */ > +"debug_frame_frame_size:\n" > +" .byte 0, 0\n" /* frame_size (to be filled in) */ > +" .byte 0x88, 1\n" /* DW_CFA_offset, %eip, -4 */ > +/* The following ordering must match tcg_target_callee_save_regs. */ > +" .byte 0x85, 2\n" /* DW_CFA_offset, %ebp, -8 */ > +" .byte 0x83, 3\n" /* DW_CFA_offset, %ebx, -12 */ > +" .byte 0x86, 4\n" /* DW_CFA_offset, %esi, -16 */ > +" .byte 0x87, 5\n" /* DW_CFA_offset, %edi, -20 */ > +" .align 4\n" > +"99:\n" /* End of FDE */ > +"debug_frame_end:\n" > +".previous" > +); > +#endif > + > +size_t tcg_register_jit(void *buf, size_t buf_size) > +{ > + const unsigned int frame_size = FRAME_SIZE; > + unsigned int f_lo, f_hi; > + > + /* ??? These could be filled in generically via reading the debug data. */ > + debug_frame_code_start = buf; > + debug_frame_code_len = buf_size; > + > + /* ??? We're expecting a 2 byte uleb128 encoded value. */ > + f_lo = (frame_size & 0x7f) | 0x80; > + f_hi = frame_size >> 7; > + assert(frame_size >> 14 == 0); > + debug_frame_frame_size[0] = f_lo; > + debug_frame_frame_size[1] = f_hi; > + > + return tcg_register_jit_int(buf, buf_size, debug_frame, > + debug_frame_end - debug_frame); > +} > diff --git a/tcg/tcg.c b/tcg/tcg.c > index 411f971..63d81f9 100644 > --- a/tcg/tcg.c > +++ b/tcg/tcg.c > @@ -57,6 +57,18 @@ > #include "cpu.h" > > #include "tcg-op.h" > + > +#if TCG_TARGET_REG_BITS == 64 > +# define ELF_CLASS ELFCLASS64 > +#else > +# define ELF_CLASS ELFCLASS32 > +#endif > +#ifdef HOST_WORDS_BIGENDIAN > +# define ELF_DATA ELFDATA2MSB > +#else > +# define ELF_DATA ELFDATA2LSB > +#endif > + > #include "elf.h" > > #if defined(CONFIG_USE_GUEST_BASE) && !defined(TCG_TARGET_HAS_GUEST_BASE) > @@ -68,6 +80,9 @@ static void tcg_target_qemu_prologue(TCGContext *s); > static void patch_reloc(uint8_t *code_ptr, int type, > tcg_target_long value, tcg_target_long addend); > > +static size_t tcg_register_jit_int(void *buf, size_t size, > + void *debug_frame, size_t debug_frame_size); > + > TCGOpDef tcg_op_defs[] = { > #define DEF(s, oargs, iargs, cargs, flags) { #s, oargs, iargs, cargs, iargs + oargs + cargs, flags }, > #include "tcg-opc.h" > @@ -2233,3 +2248,173 @@ void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf) > cpu_fprintf(f, "[TCG profiler not compiled]\n"); > } > #endif > + > +#ifdef ELF_HOST_MACHINE > +/* The backend should define ELF_HOST_MACHINE to indicate both what value to > + put into the ELF image and to indicate support for the feature. */ > + > +/* Begin GDB interface. The following must match the docs. */ > +typedef enum > +{ > + JIT_NOACTION = 0, > + JIT_REGISTER_FN, > + JIT_UNREGISTER_FN > +} jit_actions_t; > + > +struct jit_code_entry { > + struct jit_code_entry *next_entry; > + struct jit_code_entry *prev_entry; > + const char *symfile_addr; > + uint64_t symfile_size; > +}; > + > +struct jit_descriptor { > + uint32_t version; > + uint32_t action_flag; > + struct jit_code_entry *relevant_entry; > + struct jit_code_entry *first_entry; > +}; > + > +void __jit_debug_register_code(void); > +void __attribute__((noinline)) __jit_debug_register_code(void) Leading '_' are reserved for the system and tabu for the application (we have some other violations, yes). > +{ > + asm(""); > +} > + > +/* Must statically initialize the version, because GDB may check > + the version before we can set it. */ > +struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; > + > +/* End GDB interface. */ > + > +static int find_string(const char *strtab, const char *str) > +{ > + const char *p = strtab + 1; > + > + while (1) { > + if (strcmp(p, str) == 0) { > + return p - strtab; > + } > + p += strlen(p) + 1; > + } > +} > + > +static size_t tcg_register_jit_int(void *buf, size_t buf_size, > + void *debug_frame, size_t debug_frame_size) > +{ > + static const char strings[64] = > + "\0" > + ".text\0" > + ".debug_frame\0" > + ".symtab\0" > + ".strtab\0" > + "code_gen_buffer"; > + > + struct ElfImage { > + ElfW(Ehdr) ehdr; > + ElfW(Phdr) phdr; > + ElfW(Shdr) shdr[5]; > + ElfW(Sym) sym[1]; > + char str[64]; > + }; > + > + /* We only need a single jit entry; statically allocate it. */ > + static struct jit_code_entry one_entry; > + > + struct ElfImage *img = buf; > + size_t grab = sizeof(*img) + debug_frame_size; > + > + img->ehdr.e_ident[EI_MAG0] = ELFMAG0; > + img->ehdr.e_ident[EI_MAG1] = ELFMAG1; > + img->ehdr.e_ident[EI_MAG2] = ELFMAG2; > + img->ehdr.e_ident[EI_MAG3] = ELFMAG3; > + img->ehdr.e_ident[EI_CLASS] = ELF_CLASS; > + img->ehdr.e_ident[EI_DATA] = ELF_DATA; > + img->ehdr.e_ident[EI_VERSION] = EV_CURRENT; > + img->ehdr.e_type = ET_EXEC; > + img->ehdr.e_machine = ELF_HOST_MACHINE; > + img->ehdr.e_version = EV_CURRENT; > + img->ehdr.e_phoff = offsetof(struct ElfImage, phdr); > + img->ehdr.e_shoff = offsetof(struct ElfImage, shdr); > + img->ehdr.e_ehsize = sizeof(ElfW(Shdr)); > + img->ehdr.e_phentsize = sizeof(ElfW(Phdr)); > + img->ehdr.e_phnum = 1; > + img->ehdr.e_shentsize = sizeof(img->shdr[0]); > + img->ehdr.e_shnum = ARRAY_SIZE(img->shdr); > + img->ehdr.e_shstrndx = ARRAY_SIZE(img->shdr) - 1; > + > + img->phdr.p_type = PT_LOAD; > + img->phdr.p_offset = (char *)buf - (char *)img; > + img->phdr.p_vaddr = (ElfW(Addr))buf; > + img->phdr.p_paddr = img->phdr.p_vaddr; > + img->phdr.p_filesz = buf_size; > + img->phdr.p_memsz = buf_size; > + img->phdr.p_flags = PF_X; > + > + memcpy(img->str, strings, sizeof(img->str)); > + > + img->shdr[0].sh_type = SHT_NULL; > + > + img->shdr[1].sh_name = find_string(img->str, ".text"); > + img->shdr[1].sh_type = SHT_PROGBITS; > + img->shdr[1].sh_flags = SHF_EXECINSTR | SHF_ALLOC; > + img->shdr[1].sh_addr = (ElfW(Addr))buf + grab; > + img->shdr[1].sh_offset = grab; > + img->shdr[1].sh_size = buf_size - grab; > + > + img->shdr[2].sh_name = find_string(img->str, ".debug_frame"); > + img->shdr[2].sh_type = SHT_PROGBITS; > + img->shdr[2].sh_offset = sizeof(*img); > + img->shdr[2].sh_size = debug_frame_size; > + memcpy(img + 1, debug_frame, debug_frame_size); > + > + img->shdr[3].sh_name = find_string(img->str, ".symtab"); > + img->shdr[3].sh_type = SHT_SYMTAB; > + img->shdr[3].sh_offset = offsetof(struct ElfImage, sym); > + img->shdr[3].sh_size = sizeof(img->sym); > + img->shdr[3].sh_info = ARRAY_SIZE(img->sym); > + img->shdr[3].sh_link = img->ehdr.e_shstrndx; > + img->shdr[3].sh_entsize = sizeof(ElfW(Sym)); > + > + img->shdr[4].sh_name = find_string(img->str, ".strtab"); > + img->shdr[4].sh_type = SHT_STRTAB; > + img->shdr[4].sh_offset = offsetof(struct ElfImage, str); > + img->shdr[4].sh_size = sizeof(img->str); > + > + img->sym[0].st_name = find_string(img->str, "code_gen_buffer"); > + img->sym[0].st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC); > + img->sym[0].st_shndx = 1; > + img->sym[0].st_value = (ElfW(Addr))buf + grab; > + img->sym[0].st_size = buf_size - grab; > + > +#if 0 #ifdef DEBUG_ELF_GENERATION or so? > + /* Enable this block to be able to debug the ELF image file creation. > + One can use readelf, objdump, or other inspection utilities. */ > + { > + FILE *f = fopen("/tmp/qemu.jit", "w+b"); > + if (f) { > + if (fwrite(buf, buf_size, 1, f) != buf_size) { > + /* Avoid stupid unused return value warning for fwrite. */ > + } > + fclose(f); > + } > + } > +#endif > + > + one_entry.symfile_addr = buf; > + one_entry.symfile_size = buf_size; > + > + __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; > + __jit_debug_descriptor.relevant_entry = &one_entry; > + __jit_debug_descriptor.first_entry = &one_entry; > + __jit_debug_register_code(); > + > + return grab; > +} > +#else > +/* No support for the feature. Provide the entry point expected by exec.c. */ > +size_t tcg_register_jit(void *buf, size_t buf_size) > +{ > + return 0; > +} > +#endif /* ELF_HOST_MACHINE */ > diff --git a/tcg/tcg.h b/tcg/tcg.h > index dc5e9c9..3d2e5d0 100644 > --- a/tcg/tcg.h > +++ b/tcg/tcg.h > @@ -574,3 +574,5 @@ extern uint8_t code_gen_prologue[]; > #define tcg_qemu_tb_exec(env, tb_ptr) \ > ((long REGPARM (*)(void *, void *))code_gen_prologue)(env, tb_ptr) > #endif > + > +size_t tcg_register_jit(void *, size_t); Jan