From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from [140.186.70.92] (port=52877 helo=eggs.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1PCvu4-0003k7-4F for qemu-devel@nongnu.org; Mon, 01 Nov 2010 11:02:41 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1PCvtW-0000fL-B5 for qemu-devel@nongnu.org; Mon, 01 Nov 2010 11:02:02 -0400 Received: from cantor2.suse.de ([195.135.220.15]:39331 helo=mx2.suse.de) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1PCvtV-0000e0-MM for qemu-devel@nongnu.org; Mon, 01 Nov 2010 11:01:58 -0400 From: Alexander Graf Date: Mon, 1 Nov 2010 16:01:31 +0100 Message-Id: <1288623713-28062-19-git-send-email-agraf@suse.de> In-Reply-To: <1288623713-28062-1-git-send-email-agraf@suse.de> References: <1288623713-28062-1-git-send-email-agraf@suse.de> Subject: [Qemu-devel] [PATCH 18/40] xenner: kernel: Main List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: qemu-devel Developers Cc: Gerd Hoffmann This patch adds the platform agnostic piece of xenner's main loop. Signed-off-by: Alexander Graf --- pc-bios/xenner/xenner-main.c | 875 ++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 875 insertions(+), 0 deletions(-) create mode 100644 pc-bios/xenner/xenner-main.c diff --git a/pc-bios/xenner/xenner-main.c b/pc-bios/xenner/xenner-main.c new file mode 100644 index 0000000..c63f447 --- /dev/null +++ b/pc-bios/xenner/xenner-main.c @@ -0,0 +1,875 @@ +/* + * Copyright (C) Red Hat 2007 + * Copyright (C) Novell Inc. 2010 + * + * Author(s): Gerd Hoffmann + * Alexander Graf + * + * Xenner generic main functions + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; under version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ + +#include "config-host.h" + +static void set_up_context(void *ctxt, unsigned long boot_cr3, + unsigned long init_pt_len); +static void guest_hypercall_page(struct xen_cpu *cpu); + +void *memset(void *s, int c, size_t n) +{ + uint8_t *p = s; + uint32_t i; + + for (i = 0; i < n; i++) { + p[i] = c; + } + return s; +} + +void *memcpy(void *dest, const void *src, size_t n) +{ + const uint8_t *s = src; + uint8_t *d = dest; + uint32_t i; + + for (i = 0; i < n; i++) { + d[i] = s[i]; + } + return dest; +} + +int memcmp(const void *s1, const void *s2, size_t n) +{ + const uint8_t *a = s1; + const uint8_t *b = s2; + int i; + + for (i = 0; i < n; i++) { + if (a[i] == b[i]) { + continue; + } + if (a[i] < b[i]) { + return -1; + } + return 1; + } + return 0; +} + +/* --------------------------------------------------------------------- */ + +static void print_gpf_info(int level, struct xen_cpu *cpu, struct regs *regs) +{ + uint8_t *code = (void*)regs->rip; + + printk(level, "%s: vcpu %d, index 0x%x%s%s%s, " + "rflags %" PRIxREG ", cs:rip %" PRIxREG ":%" PRIxREG " " + "-> 0x%02x, 0x%02x, 0x%02x, 0x%02x, 0x%02x, 0x%02x, 0x%02x, 0x%02x\n", + __FUNCTION__, cpu->id, (int)(regs->error >> 3), + (regs->error & 0x04) ? ", TI" : "", + (regs->error & 0x02) ? ", IDT" : "", + (regs->error & 0x01) ? ", EXT" : "", + regs->rflags, regs->cs, regs->rip, + code[0], code[1], code[2], code[3], + code[4], code[5], code[6], code[7]); +} + +static void print_page_fault_info(int level, struct xen_cpu *cpu, struct regs *regs, ureg_t cr2) +{ + printk(level, "%s:%s%s%s%s%s%s, rip %" PRIxREG ", cr2 %" PRIxREG ", vcpu %d\n", + __FUNCTION__, +#ifdef CONFIG_64BIT + is_kernel(cpu) ? " [kernel-mode]" : " [user-mode]", +#else + "", +#endif + regs->error & 0x01 ? " preset" : " nopage", + regs->error & 0x02 ? " write" : " read", + regs->error & 0x04 ? " user" : " kernel", + regs->error & 0x08 ? " reserved-bit" : "", + regs->error & 0x10 ? " instr-fetch" : "", + regs->rip, cr2, cpu->id); +} + +static int fixup_extable(struct regs *regs) +{ + uintptr_t *ptr; + + for (ptr = _estart; ptr < _estop; ptr += 2) { + if (ptr[0] != regs->rip) { + continue; + } + printk(2, "fixup: %" PRIxPTR " -> %" PRIxPTR "\n", ptr[0], ptr[1]); + regs->rip = ptr[1]; + vminfo.faults[XEN_FAULT_PAGE_FAULT_FIX_EXTAB]++; + return 1; + } + return 0; +} + +int panic(const char *message, struct regs *regs) +{ + printk(0, "panic: %s\n", message); + if (regs) { + print_state(regs); + } + emudev_cmd(EMUDEV_CMD_GUEST_SHUTDOWN, -1); + return 0; +} + +/* --------------------------------------------------------------------- */ + +#ifdef CONFIG_64BIT +# define DR "%016" PRIxREG +# define DC "%08" PRIxREG +# define DS "%04" PRIxREG +#else +# define DR "%08" PRIxREG +# define DC "%08" PRIxREG +# define DS "%04" PRIxREG +#endif + +void print_registers(int level, struct regs *regs) +{ + ureg_t ds,es,fs,gs,cr0,cr2,cr3,cr4; + + asm volatile("mov %%ds, %[ds] \n" + "mov %%es, %[es] \n" + "mov %%fs, %[fs] \n" + "mov %%gs, %[gs] \n" + : [ds] "=r" (ds), + [es] "=r" (es), + [fs] "=r" (fs), + [gs] "=r" (gs) + : /* no inputs */); + asm volatile("mov %%cr0, %[cr0] \n" + "mov %%cr2, %[cr2] \n" + "mov %%cr3, %[cr3] \n" + "mov %%cr4, %[cr4] \n" + : [cr0] "=r" (cr0), + [cr2] "=r" (cr2), + [cr3] "=r" (cr3), + [cr4] "=r" (cr4) + : /* no inputs */); + + printk(level, "printing registers\n"); + printk(level, " code cs:rip " DS ":" DR "\n", regs->cs, regs->rip); + printk(level, " stack ss:rsp " DS ":" DR "\n", regs->ss, regs->rsp); + printk(level, " rax " DR " rbx " DR " rcx " DR " rdx " DR "\n", + regs->rax, regs->rbx, regs->rcx, regs->rdx); + printk(level, " rsi " DR " rdi " DR " rsp " DR " rbp " DR "\n", + regs->rsi, regs->rdi, regs->rsp, regs->rbp); +#ifdef CONFIG_64BIT + printk(level, " r8 " DR " r9 " DR " r10 " DR " r11 " DR "\n", + regs->r8, regs->r9, regs->r10, regs->r11); + printk(level, " r12 " DR " r13 " DR " r14 " DR " r15 " DR "\n", + regs->r12, regs->r13, regs->r14, regs->r15); +#endif + printk(level, " cs " DS " ds " DS " es " DS " fs " DS " gs " DS " ss " DS "\n", + regs->cs, ds, es, fs, gs, regs->ss); + printk(level, " cr0 " DC " cr2 " DC " cr3 " DC " cr4 " DC " rflags " DC "\n", + cr0, cr2, cr3, cr4, regs->rflags); + print_bits(level, " cr0", cr0, cr0, cr0_bits); + print_bits(level, " cr4", cr4, cr4, cr4_bits); + print_bits(level, " rflags", regs->rflags, regs->rflags, rflags_bits); + +} + +void print_stack(int level, ureg_t rsp) +{ + ureg_t max; + + max = ((rsp + PAGE_SIZE) & PAGE_MASK) - sizeof(ureg_t); + printk(level, "printing stack " DR " - " DR "\n", rsp, max); + while (rsp <= max) { + printk(level, " " DR ": " DR "\n", rsp, *((ureg_t*)rsp)); + rsp += sizeof(ureg_t); + } +} + +void print_state(struct regs *regs) +{ + print_registers(0, regs); + print_stack(0, regs->rsp); +} + +#undef DR + +/* --------------------------------------------------------------------- */ + +static struct descriptor_32 mkdesc(const struct kvm_segment *seg) +{ + struct descriptor_32 desc; + int shift = 0; + + shift = seg->g ? 12 : 0; + desc.a = (seg->limit >> shift) & 0xffff; + desc.b = (seg->limit >> shift) & 0x000f0000; + + desc.a |= (seg->base & 0xffff) << 16; + desc.b |= seg->base & 0xff000000; + desc.b |= (seg->base & 0xff0000) >> 16; + desc.b |= (seg->type & 0x0f) << 8; + desc.b |= (seg->dpl & 0x03) << 13; + + if (seg->s) desc.b |= (1 << 12); + if (seg->present) desc.b |= (1 << 15); + if (seg->avl) desc.b |= (1 << 20); + if (seg->l) desc.b |= (1 << 21); + if (seg->db) desc.b |= (1 << 22); + if (seg->g) desc.b |= (1 << 23); + + return desc; +} + +static inline void gdt_set(struct descriptor_32 *gdt, const struct kvm_segment *seg) +{ + gdt[ seg->selector >> 3 ] = mkdesc(seg); +} + +static void cr_init(struct xen_cpu *cpu) +{ + ureg_t cr0, cr4; + + printk(2, "%s: cpu %d\n", __FUNCTION__, cpu->id); + + cr0 = read_cr0(); + cr0 |= X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | X86_CR0_NE | \ + X86_CR0_WP | X86_CR0_AM | X86_CR0_PG; + cr0 &= ~(X86_CR0_TS|X86_CR0_CD|X86_CR0_NW); + print_bits(2, "cr0", read_cr0(), cr0, cr0_bits); + write_cr0(cr0); + + cr4 = read_cr4(); + cr4 |= X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT; + print_bits(2, "cr4", read_cr4(), cr4, cr4_bits); + write_cr4(cr4); +} + +static void stack_init(struct xen_cpu *cpu) +{ + uintptr_t *ptr; + int pages; + + if (cpu->stack_low) { + return; + } + + /* allocate stack */ + pages = (boot_stack_high - boot_stack_low + PAGE_SIZE -1) / PAGE_SIZE; + cpu->stack_low = get_pages(pages, "stack"); + cpu->stack_high = cpu->stack_low + pages * PAGE_SIZE; + + /* set per-cpu data pointer */ + ptr = STACK_PTR(cpu, cpu_ptr); + *ptr = (uintptr_t)cpu; + + /* set per-cpu data pointer for boot stack */ + if (!cpu->id) { + ptr = (void*)(&cpu_ptr); + *ptr = (uintptr_t)cpu; + } + +#ifdef CONFIG_64BIT + /* copy and setup syscall trampoline from boot stack */ + memcpy(STACK_PTR(cpu, trampoline_start), + trampoline_start, trampoline_stop - trampoline_start); + ptr = STACK_PTR(cpu, trampoline_patch); + *ptr = (uintptr_t)trampoline_syscall; + + /* allocate irq stack */ + cpu->irqstack_low = get_pages(pages, "irqstack"); + cpu->irqstack_high = cpu->irqstack_low + PAGE_SIZE; + + /* set per-cpu data pointer */ + ptr = IRQSTACK_PTR(cpu, cpu_ptr); + *ptr = (uintptr_t)cpu; +#endif +} + +void gdt_load(struct xen_cpu *cpu) +{ + struct { + uint16_t len; + uintptr_t ptr; + } __attribute__((packed)) gdtp = { + .len = (16 * PAGE_SIZE)-1, + .ptr = (uintptr_t)cpu->gdt, + }; + + asm volatile("lgdt %0" : : "m" (gdtp) : "memory"); +} + +void idt_load(void) +{ + struct { + uint16_t len; + uintptr_t ptr; + } __attribute__((packed)) idtp = { + .len = sizeof(xen_idt)-1, + .ptr = (uintptr_t)xen_idt, + }; + + asm volatile("lidt %0" : : "m" (idtp) : "memory"); +} + +void guest_cpu_init(struct xen_cpu *cpu) +{ + struct vcpu_guest_context *ctxt = cpu->init_ctxt; + ureg_t mfns[16]; + int i; + + if (ctxt->gdt_ents) { + for (i = 0; i < 16; i++) { + mfns[i] = ctxt->gdt_frames[i]; + } + guest_gdt_init(cpu, ctxt->gdt_ents, mfns); + } + + ctxt->kernel_ss = fix_sel(ctxt->kernel_ss); + ctxt->user_regs.cs = fix_sel(ctxt->user_regs.cs); + ctxt->user_regs.ds = fix_sel(ctxt->user_regs.ds); + ctxt->user_regs.es = fix_sel(ctxt->user_regs.es); + ctxt->user_regs.fs = fix_sel(ctxt->user_regs.fs); + ctxt->user_regs.gs = fix_sel(ctxt->user_regs.gs); + ctxt->user_regs.ss = fix_sel(ctxt->user_regs.ss); + + cpu->kernel_ss = ctxt->kernel_ss; + cpu->kernel_sp = ctxt->kernel_sp; +} + +static uint64_t maddr_to_paddr(uint64_t _maddr) +{ + unsigned long virt_base = emudev_get(EMUDEV_CONF_PV_VIRT_BASE, 0); + uint64_t maddr = _maddr; + uint64_t mfn = addr_to_frame(maddr); + + /* M2P */ + if ((mfn >= vmconf.mfn_m2p) && (mfn < (vmconf.mfn_m2p + vmconf.pg_m2p))) { + return XEN_M2P + maddr - frame_to_addr(vmconf.mfn_m2p); + } + + /* xenner */ + if (maddr < frame_to_addr(vmconf.mfn_guest)) { + return (uintptr_t)_vstart + maddr; + } + + /* guest */ + maddr -= frame_to_addr(vmconf.mfn_guest); + maddr += virt_base; + + return maddr; +} + +static void *pfn_to_ptr(xen_pfn_t pfn) +{ + unsigned long addr = frame_to_addr(pfn); + + addr += frame_to_addr(vmconf.mfn_guest); + return map_page(addr); +} + +static void guest_start_info(struct xen_cpu *cpu, struct regs *regs, + unsigned long init_pt_len, unsigned long boot_cr3) +{ + struct start_info *start_info; + uint64_t i; + uint64_t virt_base = emudev_get(EMUDEV_CONF_PV_VIRT_BASE, 0); + uint64_t initrd_len; + uint64_t cmdline_pfn = emudev_get(EMUDEV_CONF_PFN_CMDLINE, 0); + unsigned long *mfn_list; + uint64_t mfn_list_pfn = emudev_get(EMUDEV_CONF_PFN_MFN_LIST, 0); + char cap_ver[] = CAP_VERSION_STRING; + char *cmdline = NULL; + + start_info = pfn_to_ptr(emudev_get(EMUDEV_CONF_PFN_START_INFO, 0)); + + printk(1, "%s: called\n", __FUNCTION__); + + memset(start_info, 0, sizeof(*start_info)); + memcpy(start_info->magic, cap_ver, sizeof(cap_ver)); + start_info->magic[sizeof(start_info->magic) - 1] = '\0'; + + start_info->shared_info = EMU_PA(&shared_info); + start_info->pt_base = maddr_to_paddr(boot_cr3); + start_info->nr_pt_frames = addr_to_frame(init_pt_len + (PAGE_SIZE - 1)); + start_info->shared_info = (unsigned long)EMU_PA(&shared_info); + start_info->nr_pages = emudev_get(EMUDEV_CONF_GUEST_PAGE_COUNT, 0); + start_info->store_mfn = emudev_get(EMUDEV_CONF_MFN_XENSTORE, 0); + start_info->store_evtchn = emudev_get(EMUDEV_CONF_EVTCH_XENSTORE, 0); + start_info->console.domU.mfn = emudev_get(EMUDEV_CONF_MFN_CONSOLE, 0); + start_info->console.domU.evtchn = emudev_get(EMUDEV_CONF_EVTCH_CONSOLE, 0); + + initrd_len = emudev_get(EMUDEV_CONF_INITRD_LEN, 0); + if (initrd_len) { + start_info->mod_start = virt_base + + frame_to_addr(emudev_get(EMUDEV_CONF_PFN_INITRD, 0)); + start_info->mod_len = initrd_len; + } + + if (cmdline_pfn) { + cmdline = pfn_to_ptr(cmdline_pfn); + + memcpy(start_info->cmd_line, pfn_to_ptr(cmdline_pfn), + MAX_GUEST_CMDLINE); + printk(1, "guest cmdline: %s\n", start_info->cmd_line); + } + + /* set up m2p page table */ + for (i = 0; i < vmconf.pg_total; i++) { + m2p[i + vmconf.mfn_guest] = i; + } + + /* fill mfn list */ + start_info->mfn_list = virt_base + frame_to_addr(mfn_list_pfn); + mfn_list = (void*)start_info->mfn_list; + + for (i = 0; i < start_info->nr_pages; i++) { + mfn_list[i] = i + vmconf.mfn_guest; + } + + regs->rsi = (unsigned long)start_info; + + free_page(start_info); + if (cmdline) { + free_page(cmdline); + } +} + +static void cpu_set_cr3(struct xen_cpu *cpu, unsigned long boot_cr3) +{ +#ifdef CONFIG_64BIT + cpu->user_mode = 0; + cpu->kernel_cr3_mfn = addr_to_frame(boot_cr3); +#else + cpu->cr3_mfn = addr_to_frame(boot_cr3); +#endif +} + +static uint64_t count_pgtables(uint64_t max_pfn) +{ + uint64_t r = max_pfn; + uint64_t fourmb = addr_to_frame(4 * 1024 * 1024); + + /* XXX this should become a real calculation, for now assume we need max + * 200 page table pages */ + r += 200; + + /* pad to 4mb */ + r = (r + fourmb - 1) & ~(fourmb - 1); + + return r; +} + +/* + * Maps the guest into its own virtual address space in its own page table and + * returns the length and maddr of that new page table + */ +static unsigned long map_guest(unsigned long *boot_cr3) +{ + uint64_t virt_base = emudev_get(EMUDEV_CONF_PV_VIRT_BASE, 0); + struct xen_cpu tmp_cpu; + uint64_t max_pfn = emudev_get(EMUDEV_CONF_PFN_INIT_PT, 0); + unsigned long init_pt_len; + + max_pfn += count_pgtables(max_pfn); + + /* create initial page table that maps the guest virt_base linearly + to host physical memory. This has to happen in guest visible mem */ + switch_heap(HEAP_HIGH); + + *boot_cr3 = (unsigned long)EMU_PA(get_pages(1, "pt root")); + cpu_set_cr3(&tmp_cpu, *boot_cr3); + printk(3, "init guest pt map mfn %lx len %lx\n", (unsigned long)vmconf.mfn_guest, + (unsigned long)max_pfn); + + map_region(&tmp_cpu, virt_base, EMU_PGFLAGS, vmconf.mfn_guest, max_pfn); + + /* save the pt len for start_info */ + init_pt_len = heap_size(); + + switch_heap(HEAP_EMU); + + return init_pt_len; +} + + +/* --------------------------------------------------------------------- */ + +static struct xen_cpu *cpu_alloc(int id) +{ + struct xen_cpu *cpu; + ureg_t cr3; + + printk(1, "%s: cpu %d\n", __FUNCTION__, id); + + cpu = get_memory(sizeof(*cpu), "per-cpu data"); + cpu->id = id; + cpu->periodic = XEN_DEFAULT_PERIOD; + cpu->v.vcpu_info = (void*)&shared_info.vcpu_info[id]; + cpu->v.vcpu_info_pa = EMU_PA(cpu->v.vcpu_info); + guest_cli(cpu); + list_add_tail(&cpu->next, &cpus); + + asm volatile("mov %%cr3,%0" : "=r" (cr3)); + pv_write_cr3(cpu, addr_to_frame(cr3)); + + gdt_init(cpu); + stack_init(cpu); + tss_init(cpu); + return cpu; +} + +struct xen_cpu *cpu_find(int id) +{ + struct list_head *item; + struct xen_cpu *cpu; + + list_for_each(item, &cpus) { + cpu = list_entry(item, struct xen_cpu, next); + if (cpu->id == id) { + return cpu; + } + } + return cpu_alloc(id); +} + +static void cpu_init(struct xen_cpu *cpu) +{ + printk(1, "%s: cpu %d\n", __FUNCTION__, cpu->id); + + gdt_load(cpu); + ltr(tss(cpu) << 3); + idt_load(); + cr_init(cpu); + msrs_init(cpu); + pv_init(cpu); + + vminfo.vcpus_online |= (1 << cpu->id); + vminfo.vcpus_running |= (1 << cpu->id); + vminfo.vcpus++; + cpu->online = 1; +} + +static void userspace_config(void) +{ + uint32_t pfn; + int i; + + /* read config */ + vmconf.debug_level = emudev_get(EMUDEV_CONF_DEBUG_LEVEL, 0); + vmconf.mfn_emu = emudev_get(EMUDEV_CONF_EMU_START_PFN, 0); + vmconf.pg_emu = emudev_get(EMUDEV_CONF_EMU_PAGE_COUNT, 0); + vmconf.mfn_m2p = emudev_get(EMUDEV_CONF_M2P_START_PFN, 0); + vmconf.pg_m2p = emudev_get(EMUDEV_CONF_M2P_PAGE_COUNT, 0); + vmconf.mfn_guest = emudev_get(EMUDEV_CONF_GUEST_START_PFN, 0); + vmconf.pg_guest = emudev_get(EMUDEV_CONF_GUEST_PAGE_COUNT, 0); + vmconf.pg_total = emudev_get(EMUDEV_CONF_TOTAL_PAGE_COUNT, 0); + vmconf.nr_cpus = emudev_get(EMUDEV_CONF_NR_VCPUS, 0); + + /* write config */ + pfn = addr_to_frame(EMU_PA(&boot_ctxt)); + emudev_set(EMUDEV_CONF_BOOT_CTXT_PFN, 0, pfn); + pfn = addr_to_frame(EMU_PA(&vminfo)); + emudev_set(EMUDEV_CONF_VMINFO_PFN, 0, pfn); + pfn = addr_to_frame(EMU_PA(&grant_table)); + for (i = 0; i < GRANT_FRAMES_MAX; i++) + emudev_set(EMUDEV_CONF_GRANT_TABLE_PFNS, i, pfn+i); + + /* commands */ + emudev_cmd(EMUDEV_CMD_CONFIGURATION_DONE, 0); +} + +/* --------------------------------------------------------------------- */ +/* called from assembler */ + +asmlinkage void do_boot(struct regs *regs) +{ + struct xen_cpu *cpu; + struct xen_cpu boot_cpu; + unsigned long init_pt_len, boot_cr3; + + printk(0, "this is %s (qemu-xenner %s), boot cpu #0\n", EMUNAME, + QEMU_VERSION QEMU_PKGVERSION); + + userspace_config(); + printk(1, "%s: configuration done\n", EMUNAME); + + cpu_set_cr3(&boot_cpu, EMU_PA(emu_pgd)); + paging_init(&boot_cpu); + init_pt_len = map_guest(&boot_cr3); + + set_up_context(&boot_ctxt, boot_cr3, init_pt_len); + + cpu = cpu_alloc(0); + cpu->init_ctxt = &boot_ctxt; + idt_init(); + cpu_init(cpu); + printk(1, "%s: boot cpu setup done\n", EMUNAME); + +#ifdef CONFIG_64BIT + paging_init(cpu); +#endif + paging_start(cpu); + printk(1, "%s: paging setup done\n", EMUNAME); + + irq_init(cpu); + printk(1, "%s: irq setup done\n", EMUNAME); + + guest_cpu_init(cpu); + guest_regs_init(cpu, regs); + guest_start_info(cpu, regs, init_pt_len, boot_cr3); + guest_hypercall_page(cpu); + printk(1, "%s: booting guest kernel (entry %" PRIxREG ":%" PRIxREG ") ...\n", + EMUNAME, regs->cs, regs->rip); +} + +asmlinkage void do_boot_secondary(ureg_t id, struct regs *regs) +{ + struct xen_cpu *cpu; + + printk(0, "this is cpu #%d\n", (int)id); + cpu = cpu_find(id); + cpu_init(cpu); + paging_start(cpu); + irq_init(cpu); +#if 0 + if (cpu->virq_to_vector[VIRQ_TIMER]) + lapic_timer(cpu); +#endif + + guest_cpu_init(cpu); + guest_regs_init(cpu, regs); + + print_registers(2, regs); + printk(1, "%s: secondary entry: %" PRIxREG ":%" PRIxREG ", jumping ...\n", + EMUNAME, regs->cs, regs->rip); +} + +asmlinkage void do_illegal_instruction(struct regs *regs) +{ + struct xen_cpu *cpu = get_cpu(); + int skip; + + vminfo.faults[XEN_FAULT_ILLEGAL_INSTRUCTION]++; + if (context_is_emu(regs)) { + panic("ring0 (emu) illegal instruction", regs); + } + if (context_is_user(cpu, regs)) { + uint8_t *i = (void*)regs->rip; + printk(1, "user ill: at %p" + " 0x%02x, 0x%02x, 0x%02x, 0x%02x," + " 0x%02x, 0x%02x, 0x%02x, 0x%02x\n", + i, i[0], i[1], i[2], i[3], i[4], i[5], i[6], i[7]); + bounce_trap(cpu, regs, 6, -1); + return; + } + + skip = emulate(cpu, regs); + switch (skip) { + case -1: /* error */ + panic("instruction emulation failed (ill)\n", regs); + break; + case 0: /* bounce to guest */ + bounce_trap(cpu, regs, 6, -1); + break; + default: /* handled */ + regs->rip += skip; + break; + } +} + +static int is_allowed_io(struct xen_cpu *cpu, struct regs *regs) +{ + uint8_t *code = (void*)regs->rip; + int pl; + +#ifdef CONFIG_64BIT + pl = context_is_user(cpu, regs) ? 3 : 1; +#else + pl = regs->cs & 0x03; +#endif + + switch (*code) { + case 0xe4 ... 0xe7: + case 0xec ... 0xef: + /* I/O instructions */ + if (pl <= cpu->iopl) + return 1; /* yes: by iopl */ + if (cpu->nr_ports) + return 1; /* yes: by bitmap (FIXME: check port) */ + break; + case 0xfa: + case 0xfb: + /* cli, sti */ + if (pl <= cpu->iopl) + return 1; /* yes: by iopl */ + } + return 0; /* no */ +} + +asmlinkage void do_general_protection(struct regs *regs) +{ + struct xen_cpu *cpu = get_cpu(); + int skip; + + vminfo.faults[XEN_FAULT_GENERAL_PROTECTION]++; + if (context_is_emu(regs)) { + if (fixup_extable(regs)) { + return; + } + print_gpf_info(0, cpu, regs); + panic("ring0 (emu) general protection fault", regs); + } + if (is_allowed_io(cpu, regs)) { + goto emulate; + } + if (context_is_user(cpu, regs)) { + vminfo.faults[XEN_FAULT_GENERAL_PROTECTION_GUEST]++; + print_gpf_info(1, cpu, regs); + bounce_trap(cpu, regs, 13, -1); + return; + } + + if (regs->error) { + print_gpf_info(0, cpu, regs); + panic("unhandled kernel gpf", regs); + } + +emulate: + skip = emulate(cpu, regs); + switch (skip) { + case -1: /* error */ + print_gpf_info(0, cpu, regs); + panic("instruction emulation failed (gpf)", regs); + break; + case 0: /* bounce to guest */ + vminfo.faults[XEN_FAULT_GENERAL_PROTECTION_GUEST]++; + bounce_trap(cpu, regs, 13, -1); + break; + default: /* handled */ + vminfo.faults[XEN_FAULT_GENERAL_PROTECTION_EMUINS]++; + regs->rip += skip; + evtchn_try_forward(cpu, regs); /* sti */ + break; + } +} + +asmlinkage void do_double_fault(struct regs *regs) +{ + panic("double fault", regs); +} + +asmlinkage void do_guest_forward(struct regs *regs) +{ + struct xen_cpu *cpu = get_cpu(); + const struct trapinfo *trap = NULL; + + if (regs->trapno < sizeof(trapinfo)/sizeof(trapinfo[0])) { + trap = trapinfo + regs->trapno; + } + printk(trap ? trap->lvl : 0, + "%s: trap %d [%s], error 0x%" PRIxREG "," + " cs:rip %" PRIxREG ":%" PRIxREG "," + " forwarding to guest\n", + __FUNCTION__, (int)regs->trapno, + trap && trap->name ? trap->name : "-", + trap && trap->ec ? regs->error : 0, + regs->cs, regs->rip); + bounce_trap(cpu, regs, regs->trapno, -1); +} + +asmlinkage void do_lazy_fpu(struct regs *regs) +{ + struct xen_cpu *cpu = get_cpu(); + + vminfo.faults[XEN_FAULT_LAZY_FPU]++; + clts(); + bounce_trap(cpu, regs, regs->trapno, -1); +} + +asmlinkage void do_int1(struct regs *regs) +{ + if (context_is_emu(regs)) { + printk(0, "%s: emu context\n", __FUNCTION__); + print_registers(0, regs); + return; + } + do_guest_forward(regs); +} + +asmlinkage void do_int3(struct regs *regs) +{ + if (context_is_emu(regs)) { + printk(0, "%s: emu context\n", __FUNCTION__); + print_registers(0, regs); + return; + } + do_guest_forward(regs); +} + +/* --------------------------------------------------------------------- */ + +static spinlock_t flush_lock = SPIN_LOCK_UNLOCKED; +static atomic_t flush_cnt; +static ureg_t flush_addr; + +asmlinkage void do_smp_flush_tlb(struct regs *regs) +{ + struct xen_cpu *cpu = get_cpu(); + + lapic_eoi(cpu); + if (flush_addr) { + flush_tlb_addr(flush_addr); + } else { + flush_tlb(); + } + atomic_dec(&flush_cnt); +} + +void flush_tlb_remote(struct xen_cpu *cpu, ureg_t mask, ureg_t addr) +{ + int cpus; + + mask &= ~(1 << cpu->id); + if (!mask) { + vminfo.faults[XEN_FAULT_OTHER_FLUSH_TLB_NONE]++; + return; + } + + /* + * we must be able to process ipi while waiting for the lock, + * otherwise we deadlock in case another cpu busy-waits for us + * doing the tlb flush. + */ + sti(); + spin_lock(&flush_lock); + + cpus = vminfo.vcpus-1; /* FIXME: not using mask, sending to all */ + flush_addr = addr; + if (flush_addr) { + vminfo.faults[XEN_FAULT_OTHER_FLUSH_TLB_PAGE]++; + } else { + vminfo.faults[XEN_FAULT_OTHER_FLUSH_TLB_ALL]++; + } + + atomic_add(cpus, &flush_cnt); + lapic_ipi_flush_tlb(cpu); + while (atomic_read(&flush_cnt)) { + pause(); + } + + spin_unlock(&flush_lock); + cli(); +} -- 1.6.0.2