Philippe Mathieu-Daudé writes: > On 10/2/19 1:30 PM, Sergio Lopez wrote: >> Move x86 functions that will be shared between PC and non-PC machine >> types to x86.c, along with their helpers. >> >> Signed-off-by: Sergio Lopez >> --- >> hw/i386/Makefile.objs | 1 + >> hw/i386/pc.c | 582 +---------------------------------- >> hw/i386/pc_piix.c | 1 + >> hw/i386/pc_q35.c | 1 + >> hw/i386/pc_sysfw.c | 54 +--- >> hw/i386/x86.c | 684 ++++++++++++++++++++++++++++++++++++++++++ >> include/hw/i386/pc.h | 1 - >> include/hw/i386/x86.h | 35 +++ >> 8 files changed, 724 insertions(+), 635 deletions(-) >> create mode 100644 hw/i386/x86.c >> create mode 100644 include/hw/i386/x86.h > > I recommend you to setup the scripts/git.orderfile file to ease reviewers :) Thanks, I wasn't aware of that feature. >> diff --git a/hw/i386/Makefile.objs b/hw/i386/Makefile.objs >> index d3374e0831..7ed80a4853 100644 >> --- a/hw/i386/Makefile.objs >> +++ b/hw/i386/Makefile.objs >> @@ -1,5 +1,6 @@ >> obj-$(CONFIG_KVM) += kvm/ >> obj-y += e820_memory_layout.o multiboot.o >> +obj-y += x86.o >> obj-y += pc.o >> obj-$(CONFIG_I440FX) += pc_piix.o >> obj-$(CONFIG_Q35) += pc_q35.o >> diff --git a/hw/i386/pc.c b/hw/i386/pc.c >> index 029bc23e7c..b9ca831164 100644 >> --- a/hw/i386/pc.c >> +++ b/hw/i386/pc.c >> @@ -25,6 +25,7 @@ >> #include "qemu/osdep.h" >> #include "qemu/units.h" >> #include "hw/i386/pc.h" >> +#include "hw/i386/x86.h" > > Nit: Include before "pc.h" :) OK, I'll check other appearances too. > Reviewed-by: Philippe Mathieu-Daudé > Tested-by: Philippe Mathieu-Daudé Thanks, Sergio. >> #include "hw/char/serial.h" >> #include "hw/char/parallel.h" >> #include "hw/i386/apic.h" >> @@ -102,9 +103,6 @@ >> struct hpet_fw_config hpet_cfg = {.count = UINT8_MAX}; >> -/* Physical Address of PVH entry point read from kernel ELF NOTE >> */ >> -static size_t pvh_start_addr; >> - >> GlobalProperty pc_compat_4_1[] = {}; >> const size_t pc_compat_4_1_len = G_N_ELEMENTS(pc_compat_4_1); >> @@ -866,478 +864,6 @@ static void handle_a20_line_change(void >> *opaque, int irq, int level) >> x86_cpu_set_a20(cpu, level); >> } >> -/* Calculates initial APIC ID for a specific CPU index >> - * >> - * Currently we need to be able to calculate the APIC ID from the CPU index >> - * alone (without requiring a CPU object), as the QEMU<->Seabios interfaces have >> - * no concept of "CPU index", and the NUMA tables on fw_cfg need the APIC ID of >> - * all CPUs up to max_cpus. >> - */ >> -static uint32_t x86_cpu_apic_id_from_index(PCMachineState *pcms, >> - unsigned int cpu_index) >> -{ >> - MachineState *ms = MACHINE(pcms); >> - PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); >> - uint32_t correct_id; >> - static bool warned; >> - >> - correct_id = x86_apicid_from_cpu_idx(pcms->smp_dies, ms->smp.cores, >> - ms->smp.threads, cpu_index); >> - if (pcmc->compat_apic_id_mode) { >> - if (cpu_index != correct_id && !warned && !qtest_enabled()) { >> - error_report("APIC IDs set in compatibility mode, " >> - "CPU topology won't match the configuration"); >> - warned = true; >> - } >> - return cpu_index; >> - } else { >> - return correct_id; >> - } >> -} >> - >> -static long get_file_size(FILE *f) >> -{ >> - long where, size; >> - >> - /* XXX: on Unix systems, using fstat() probably makes more sense */ >> - >> - where = ftell(f); >> - fseek(f, 0, SEEK_END); >> - size = ftell(f); >> - fseek(f, where, SEEK_SET); >> - >> - return size; >> -} >> - >> -struct setup_data { >> - uint64_t next; >> - uint32_t type; >> - uint32_t len; >> - uint8_t data[0]; >> -} __attribute__((packed)); >> - >> - >> -/* >> - * The entry point into the kernel for PVH boot is different from >> - * the native entry point. The PVH entry is defined by the x86/HVM >> - * direct boot ABI and is available in an ELFNOTE in the kernel binary. >> - * >> - * This function is passed to load_elf() when it is called from >> - * load_elfboot() which then additionally checks for an ELF Note of >> - * type XEN_ELFNOTE_PHYS32_ENTRY and passes it to this function to >> - * parse the PVH entry address from the ELF Note. >> - * >> - * Due to trickery in elf_opts.h, load_elf() is actually available as >> - * load_elf32() or load_elf64() and this routine needs to be able >> - * to deal with being called as 32 or 64 bit. >> - * >> - * The address of the PVH entry point is saved to the 'pvh_start_addr' >> - * global variable. (although the entry point is 32-bit, the kernel >> - * binary can be either 32-bit or 64-bit). >> - */ >> -static uint64_t read_pvh_start_addr(void *arg1, void *arg2, bool is64) >> -{ >> - size_t *elf_note_data_addr; >> - >> - /* Check if ELF Note header passed in is valid */ >> - if (arg1 == NULL) { >> - return 0; >> - } >> - >> - if (is64) { >> - struct elf64_note *nhdr64 = (struct elf64_note *)arg1; >> - uint64_t nhdr_size64 = sizeof(struct elf64_note); >> - uint64_t phdr_align = *(uint64_t *)arg2; >> - uint64_t nhdr_namesz = nhdr64->n_namesz; >> - >> - elf_note_data_addr = >> - ((void *)nhdr64) + nhdr_size64 + >> - QEMU_ALIGN_UP(nhdr_namesz, phdr_align); >> - } else { >> - struct elf32_note *nhdr32 = (struct elf32_note *)arg1; >> - uint32_t nhdr_size32 = sizeof(struct elf32_note); >> - uint32_t phdr_align = *(uint32_t *)arg2; >> - uint32_t nhdr_namesz = nhdr32->n_namesz; >> - >> - elf_note_data_addr = >> - ((void *)nhdr32) + nhdr_size32 + >> - QEMU_ALIGN_UP(nhdr_namesz, phdr_align); >> - } >> - >> - pvh_start_addr = *elf_note_data_addr; >> - >> - return pvh_start_addr; >> -} >> - >> -static bool load_elfboot(const char *kernel_filename, >> - int kernel_file_size, >> - uint8_t *header, >> - size_t pvh_xen_start_addr, >> - FWCfgState *fw_cfg) >> -{ >> - uint32_t flags = 0; >> - uint32_t mh_load_addr = 0; >> - uint32_t elf_kernel_size = 0; >> - uint64_t elf_entry; >> - uint64_t elf_low, elf_high; >> - int kernel_size; >> - >> - if (ldl_p(header) != 0x464c457f) { >> - return false; /* no elfboot */ >> - } >> - >> - bool elf_is64 = header[EI_CLASS] == ELFCLASS64; >> - flags = elf_is64 ? >> - ((Elf64_Ehdr *)header)->e_flags : ((Elf32_Ehdr *)header)->e_flags; >> - >> - if (flags & 0x00010004) { /* LOAD_ELF_HEADER_HAS_ADDR */ >> - error_report("elfboot unsupported flags = %x", flags); >> - exit(1); >> - } >> - >> - uint64_t elf_note_type = XEN_ELFNOTE_PHYS32_ENTRY; >> - kernel_size = load_elf(kernel_filename, read_pvh_start_addr, >> - NULL, &elf_note_type, &elf_entry, >> - &elf_low, &elf_high, 0, I386_ELF_MACHINE, >> - 0, 0); >> - >> - if (kernel_size < 0) { >> - error_report("Error while loading elf kernel"); >> - exit(1); >> - } >> - mh_load_addr = elf_low; >> - elf_kernel_size = elf_high - elf_low; >> - >> - if (pvh_start_addr == 0) { >> - error_report("Error loading uncompressed kernel without PVH ELF Note"); >> - exit(1); >> - } >> - fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ENTRY, pvh_start_addr); >> - fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, mh_load_addr); >> - fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, elf_kernel_size); >> - >> - return true; >> -} >> - >> -static void x86_load_linux(PCMachineState *pcms, >> - FWCfgState *fw_cfg) >> -{ >> - uint16_t protocol; >> - int setup_size, kernel_size, cmdline_size; >> - int dtb_size, setup_data_offset; >> - uint32_t initrd_max; >> - uint8_t header[8192], *setup, *kernel; >> - hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0; >> - FILE *f; >> - char *vmode; >> - MachineState *machine = MACHINE(pcms); >> - PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); >> - struct setup_data *setup_data; >> - const char *kernel_filename = machine->kernel_filename; >> - const char *initrd_filename = machine->initrd_filename; >> - const char *dtb_filename = machine->dtb; >> - const char *kernel_cmdline = machine->kernel_cmdline; >> - >> - /* Align to 16 bytes as a paranoia measure */ >> - cmdline_size = (strlen(kernel_cmdline)+16) & ~15; >> - >> - /* load the kernel header */ >> - f = fopen(kernel_filename, "rb"); >> - if (!f || !(kernel_size = get_file_size(f)) || >> - fread(header, 1, MIN(ARRAY_SIZE(header), kernel_size), f) != >> - MIN(ARRAY_SIZE(header), kernel_size)) { >> - fprintf(stderr, "qemu: could not load kernel '%s': %s\n", >> - kernel_filename, strerror(errno)); >> - exit(1); >> - } >> - >> - /* kernel protocol version */ >> -#if 0 >> - fprintf(stderr, "header magic: %#x\n", ldl_p(header+0x202)); >> -#endif >> - if (ldl_p(header+0x202) == 0x53726448) { >> - protocol = lduw_p(header+0x206); >> - } else { >> - /* >> - * This could be a multiboot kernel. If it is, let's stop treating it >> - * like a Linux kernel. >> - * Note: some multiboot images could be in the ELF format (the same of >> - * PVH), so we try multiboot first since we check the multiboot magic >> - * header before to load it. >> - */ >> - if (load_multiboot(fw_cfg, f, kernel_filename, initrd_filename, >> - kernel_cmdline, kernel_size, header)) { >> - return; >> - } >> - /* >> - * Check if the file is an uncompressed kernel file (ELF) and load it, >> - * saving the PVH entry point used by the x86/HVM direct boot ABI. >> - * If load_elfboot() is successful, populate the fw_cfg info. >> - */ >> - if (pcmc->pvh_enabled && >> - load_elfboot(kernel_filename, kernel_size, >> - header, pvh_start_addr, fw_cfg)) { >> - fclose(f); >> - >> - fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, >> - strlen(kernel_cmdline) + 1); >> - fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); >> - >> - fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, sizeof(header)); >> - fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, >> - header, sizeof(header)); >> - >> - /* load initrd */ >> - if (initrd_filename) { >> - GMappedFile *mapped_file; >> - gsize initrd_size; >> - gchar *initrd_data; >> - GError *gerr = NULL; >> - >> - mapped_file = g_mapped_file_new(initrd_filename, false, &gerr); >> - if (!mapped_file) { >> - fprintf(stderr, "qemu: error reading initrd %s: %s\n", >> - initrd_filename, gerr->message); >> - exit(1); >> - } >> - pcms->initrd_mapped_file = mapped_file; >> - >> - initrd_data = g_mapped_file_get_contents(mapped_file); >> - initrd_size = g_mapped_file_get_length(mapped_file); >> - initrd_max = pcms->below_4g_mem_size - pcmc->acpi_data_size - 1; >> - if (initrd_size >= initrd_max) { >> - fprintf(stderr, "qemu: initrd is too large, cannot support." >> - "(max: %"PRIu32", need %"PRId64")\n", >> - initrd_max, (uint64_t)initrd_size); >> - exit(1); >> - } >> - >> - initrd_addr = (initrd_max - initrd_size) & ~4095; >> - >> - fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); >> - fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); >> - fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, >> - initrd_size); >> - } >> - >> - option_rom[nb_option_roms].bootindex = 0; >> - option_rom[nb_option_roms].name = "pvh.bin"; >> - nb_option_roms++; >> - >> - return; >> - } >> - protocol = 0; >> - } >> - >> - if (protocol < 0x200 || !(header[0x211] & 0x01)) { >> - /* Low kernel */ >> - real_addr = 0x90000; >> - cmdline_addr = 0x9a000 - cmdline_size; >> - prot_addr = 0x10000; >> - } else if (protocol < 0x202) { >> - /* High but ancient kernel */ >> - real_addr = 0x90000; >> - cmdline_addr = 0x9a000 - cmdline_size; >> - prot_addr = 0x100000; >> - } else { >> - /* High and recent kernel */ >> - real_addr = 0x10000; >> - cmdline_addr = 0x20000; >> - prot_addr = 0x100000; >> - } >> - >> -#if 0 >> - fprintf(stderr, >> - "qemu: real_addr = 0x" TARGET_FMT_plx "\n" >> - "qemu: cmdline_addr = 0x" TARGET_FMT_plx "\n" >> - "qemu: prot_addr = 0x" TARGET_FMT_plx "\n", >> - real_addr, >> - cmdline_addr, >> - prot_addr); >> -#endif >> - >> - /* highest address for loading the initrd */ >> - if (protocol >= 0x20c && >> - lduw_p(header+0x236) & XLF_CAN_BE_LOADED_ABOVE_4G) { >> - /* >> - * Linux has supported initrd up to 4 GB for a very long time (2007, >> - * long before XLF_CAN_BE_LOADED_ABOVE_4G which was added in 2013), >> - * though it only sets initrd_max to 2 GB to "work around bootloader >> - * bugs". Luckily, QEMU firmware(which does something like bootloader) >> - * has supported this. >> - * >> - * It's believed that if XLF_CAN_BE_LOADED_ABOVE_4G is set, initrd can >> - * be loaded into any address. >> - * >> - * In addition, initrd_max is uint32_t simply because QEMU doesn't >> - * support the 64-bit boot protocol (specifically the ext_ramdisk_image >> - * field). >> - * >> - * Therefore here just limit initrd_max to UINT32_MAX simply as well. >> - */ >> - initrd_max = UINT32_MAX; >> - } else if (protocol >= 0x203) { >> - initrd_max = ldl_p(header+0x22c); >> - } else { >> - initrd_max = 0x37ffffff; >> - } >> - >> - if (initrd_max >= pcms->below_4g_mem_size - pcmc->acpi_data_size) { >> - initrd_max = pcms->below_4g_mem_size - pcmc->acpi_data_size - 1; >> - } >> - >> - fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr); >> - fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(kernel_cmdline)+1); >> - fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); >> - >> - if (protocol >= 0x202) { >> - stl_p(header+0x228, cmdline_addr); >> - } else { >> - stw_p(header+0x20, 0xA33F); >> - stw_p(header+0x22, cmdline_addr-real_addr); >> - } >> - >> - /* handle vga= parameter */ >> - vmode = strstr(kernel_cmdline, "vga="); >> - if (vmode) { >> - unsigned int video_mode; >> - /* skip "vga=" */ >> - vmode += 4; >> - if (!strncmp(vmode, "normal", 6)) { >> - video_mode = 0xffff; >> - } else if (!strncmp(vmode, "ext", 3)) { >> - video_mode = 0xfffe; >> - } else if (!strncmp(vmode, "ask", 3)) { >> - video_mode = 0xfffd; >> - } else { >> - video_mode = strtol(vmode, NULL, 0); >> - } >> - stw_p(header+0x1fa, video_mode); >> - } >> - >> - /* loader type */ >> - /* High nybble = B reserved for QEMU; low nybble is revision number. >> - If this code is substantially changed, you may want to consider >> - incrementing the revision. */ >> - if (protocol >= 0x200) { >> - header[0x210] = 0xB0; >> - } >> - /* heap */ >> - if (protocol >= 0x201) { >> - header[0x211] |= 0x80; /* CAN_USE_HEAP */ >> - stw_p(header+0x224, cmdline_addr-real_addr-0x200); >> - } >> - >> - /* load initrd */ >> - if (initrd_filename) { >> - GMappedFile *mapped_file; >> - gsize initrd_size; >> - gchar *initrd_data; >> - GError *gerr = NULL; >> - >> - if (protocol < 0x200) { >> - fprintf(stderr, "qemu: linux kernel too old to load a ram disk\n"); >> - exit(1); >> - } >> - >> - mapped_file = g_mapped_file_new(initrd_filename, false, &gerr); >> - if (!mapped_file) { >> - fprintf(stderr, "qemu: error reading initrd %s: %s\n", >> - initrd_filename, gerr->message); >> - exit(1); >> - } >> - pcms->initrd_mapped_file = mapped_file; >> - >> - initrd_data = g_mapped_file_get_contents(mapped_file); >> - initrd_size = g_mapped_file_get_length(mapped_file); >> - if (initrd_size >= initrd_max) { >> - fprintf(stderr, "qemu: initrd is too large, cannot support." >> - "(max: %"PRIu32", need %"PRId64")\n", >> - initrd_max, (uint64_t)initrd_size); >> - exit(1); >> - } >> - >> - initrd_addr = (initrd_max-initrd_size) & ~4095; >> - >> - fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); >> - fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); >> - fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, initrd_size); >> - >> - stl_p(header+0x218, initrd_addr); >> - stl_p(header+0x21c, initrd_size); >> - } >> - >> - /* load kernel and setup */ >> - setup_size = header[0x1f1]; >> - if (setup_size == 0) { >> - setup_size = 4; >> - } >> - setup_size = (setup_size+1)*512; >> - if (setup_size > kernel_size) { >> - fprintf(stderr, "qemu: invalid kernel header\n"); >> - exit(1); >> - } >> - kernel_size -= setup_size; >> - >> - setup = g_malloc(setup_size); >> - kernel = g_malloc(kernel_size); >> - fseek(f, 0, SEEK_SET); >> - if (fread(setup, 1, setup_size, f) != setup_size) { >> - fprintf(stderr, "fread() failed\n"); >> - exit(1); >> - } >> - if (fread(kernel, 1, kernel_size, f) != kernel_size) { >> - fprintf(stderr, "fread() failed\n"); >> - exit(1); >> - } >> - fclose(f); >> - >> - /* append dtb to kernel */ >> - if (dtb_filename) { >> - if (protocol < 0x209) { >> - fprintf(stderr, "qemu: Linux kernel too old to load a dtb\n"); >> - exit(1); >> - } >> - >> - dtb_size = get_image_size(dtb_filename); >> - if (dtb_size <= 0) { >> - fprintf(stderr, "qemu: error reading dtb %s: %s\n", >> - dtb_filename, strerror(errno)); >> - exit(1); >> - } >> - >> - setup_data_offset = QEMU_ALIGN_UP(kernel_size, 16); >> - kernel_size = setup_data_offset + sizeof(struct setup_data) + dtb_size; >> - kernel = g_realloc(kernel, kernel_size); >> - >> - stq_p(header+0x250, prot_addr + setup_data_offset); >> - >> - setup_data = (struct setup_data *)(kernel + setup_data_offset); >> - setup_data->next = 0; >> - setup_data->type = cpu_to_le32(SETUP_DTB); >> - setup_data->len = cpu_to_le32(dtb_size); >> - >> - load_image_size(dtb_filename, setup_data->data, dtb_size); >> - } >> - >> - memcpy(setup, header, MIN(sizeof(header), setup_size)); >> - >> - fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr); >> - fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size); >> - fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size); >> - >> - fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr); >> - fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size); >> - fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size); >> - >> - option_rom[nb_option_roms].bootindex = 0; >> - option_rom[nb_option_roms].name = "linuxboot.bin"; >> - if (pcmc->linuxboot_dma_enabled && fw_cfg_dma_enabled(fw_cfg)) { >> - option_rom[nb_option_roms].name = "linuxboot_dma.bin"; >> - } >> - nb_option_roms++; >> -} >> - >> #define NE2000_NB_MAX 6 >> static const int ne2000_io[NE2000_NB_MAX] = { 0x300, 0x320, >> 0x340, 0x360, >> @@ -1374,24 +900,6 @@ void pc_acpi_smi_interrupt(void *opaque, int irq, int level) >> } >> } >> -static void x86_new_cpu(PCMachineState *pcms, int64_t apic_id, >> Error **errp) >> -{ >> - Object *cpu = NULL; >> - Error *local_err = NULL; >> - CPUX86State *env = NULL; >> - >> - cpu = object_new(MACHINE(pcms)->cpu_type); >> - >> - env = &X86_CPU(cpu)->env; >> - env->nr_dies = pcms->smp_dies; >> - >> - object_property_set_uint(cpu, apic_id, "apic-id", &local_err); >> - object_property_set_bool(cpu, true, "realized", &local_err); >> - >> - object_unref(cpu); >> - error_propagate(errp, local_err); >> -} >> - >> /* >> * This function is very similar to smp_parse() >> * in hw/core/machine.c but includes CPU die support. >> @@ -1497,31 +1005,6 @@ void pc_hot_add_cpu(MachineState *ms, const int64_t id, Error **errp) >> } >> } >> -void x86_cpus_init(PCMachineState *pcms) >> -{ >> - int i; >> - const CPUArchIdList *possible_cpus; >> - MachineState *ms = MACHINE(pcms); >> - MachineClass *mc = MACHINE_GET_CLASS(pcms); >> - PCMachineClass *pcmc = PC_MACHINE_CLASS(mc); >> - >> - x86_cpu_set_default_version(pcmc->default_cpu_version); >> - >> - /* Calculates the limit to CPU APIC ID values >> - * >> - * Limit for the APIC ID value, so that all >> - * CPU APIC IDs are < pcms->apic_id_limit. >> - * >> - * This is used for FW_CFG_MAX_CPUS. See comments on fw_cfg_arch_create(). >> - */ >> - pcms->apic_id_limit = x86_cpu_apic_id_from_index(pcms, >> - ms->smp.max_cpus - 1) + 1; >> - possible_cpus = mc->possible_cpu_arch_ids(ms); >> - for (i = 0; i < ms->smp.cpus; i++) { >> - x86_new_cpu(pcms, possible_cpus->cpus[i].arch_id, &error_fatal); >> - } >> -} >> - >> static void rtc_set_cpus_count(ISADevice *rtc, uint16_t cpus_count) >> { >> if (cpus_count > 0xff) { >> @@ -2677,69 +2160,6 @@ static void pc_machine_wakeup(MachineState *machine) >> cpu_synchronize_all_post_reset(); >> } >> -static CpuInstanceProperties >> -x86_cpu_index_to_props(MachineState *ms, unsigned cpu_index) >> -{ >> - MachineClass *mc = MACHINE_GET_CLASS(ms); >> - const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms); >> - >> - assert(cpu_index < possible_cpus->len); >> - return possible_cpus->cpus[cpu_index].props; >> -} >> - >> -static int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx) >> -{ >> - X86CPUTopoInfo topo; >> - PCMachineState *pcms = PC_MACHINE(ms); >> - >> - assert(idx < ms->possible_cpus->len); >> - x86_topo_ids_from_apicid(ms->possible_cpus->cpus[idx].arch_id, >> - pcms->smp_dies, ms->smp.cores, >> - ms->smp.threads, &topo); >> - return topo.pkg_id % ms->numa_state->num_nodes; >> -} >> - >> -static const CPUArchIdList *x86_possible_cpu_arch_ids(MachineState *ms) >> -{ >> - PCMachineState *pcms = PC_MACHINE(ms); >> - int i; >> - unsigned int max_cpus = ms->smp.max_cpus; >> - >> - if (ms->possible_cpus) { >> - /* >> - * make sure that max_cpus hasn't changed since the first use, i.e. >> - * -smp hasn't been parsed after it >> - */ >> - assert(ms->possible_cpus->len == max_cpus); >> - return ms->possible_cpus; >> - } >> - >> - ms->possible_cpus = g_malloc0(sizeof(CPUArchIdList) + >> - sizeof(CPUArchId) * max_cpus); >> - ms->possible_cpus->len = max_cpus; >> - for (i = 0; i < ms->possible_cpus->len; i++) { >> - X86CPUTopoInfo topo; >> - >> - ms->possible_cpus->cpus[i].type = ms->cpu_type; >> - ms->possible_cpus->cpus[i].vcpus_count = 1; >> - ms->possible_cpus->cpus[i].arch_id = x86_cpu_apic_id_from_index(pcms, i); >> - x86_topo_ids_from_apicid(ms->possible_cpus->cpus[i].arch_id, >> - pcms->smp_dies, ms->smp.cores, >> - ms->smp.threads, &topo); >> - ms->possible_cpus->cpus[i].props.has_socket_id = true; >> - ms->possible_cpus->cpus[i].props.socket_id = topo.pkg_id; >> - if (pcms->smp_dies > 1) { >> - ms->possible_cpus->cpus[i].props.has_die_id = true; >> - ms->possible_cpus->cpus[i].props.die_id = topo.die_id; >> - } >> - ms->possible_cpus->cpus[i].props.has_core_id = true; >> - ms->possible_cpus->cpus[i].props.core_id = topo.core_id; >> - ms->possible_cpus->cpus[i].props.has_thread_id = true; >> - ms->possible_cpus->cpus[i].props.thread_id = topo.smt_id; >> - } >> - return ms->possible_cpus; >> -} >> - >> static void x86_nmi(NMIState *n, int cpu_index, Error **errp) >> { >> /* cpu index isn't used */ >> diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c >> index de09e076cd..c8afe46e37 100644 >> --- a/hw/i386/pc_piix.c >> +++ b/hw/i386/pc_piix.c >> @@ -28,6 +28,7 @@ >> #include "qemu/units.h" >> #include "hw/loader.h" >> #include "hw/i386/pc.h" >> +#include "hw/i386/x86.h" >> #include "hw/i386/apic.h" >> #include "hw/display/ramfb.h" >> #include "hw/firmware/smbios.h" >> diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c >> index 894989b64e..c87653eb6a 100644 >> --- a/hw/i386/pc_q35.c >> +++ b/hw/i386/pc_q35.c >> @@ -42,6 +42,7 @@ >> #include "hw/qdev-properties.h" >> #include "exec/address-spaces.h" >> #include "hw/i386/pc.h" >> +#include "hw/i386/x86.h" >> #include "hw/i386/ich9.h" >> #include "hw/i386/amd_iommu.h" >> #include "hw/i386/intel_iommu.h" >> diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c >> index 1ee254b15e..6d2e693179 100644 >> --- a/hw/i386/pc_sysfw.c >> +++ b/hw/i386/pc_sysfw.c >> @@ -32,6 +32,7 @@ >> #include "qemu/units.h" >> #include "hw/sysbus.h" >> #include "hw/i386/pc.h" >> +#include "hw/i386/x86.h" >> #include "hw/loader.h" >> #include "hw/qdev-properties.h" >> #include "sysemu/sysemu.h" >> @@ -211,59 +212,6 @@ static void pc_system_flash_map(PCMachineState *pcms, >> } >> } >> -static void x86_system_rom_init(MemoryRegion *rom_memory, bool >> isapc_ram_fw) >> -{ >> - char *filename; >> - MemoryRegion *bios, *isa_bios; >> - int bios_size, isa_bios_size; >> - int ret; >> - >> - /* BIOS load */ >> - if (bios_name == NULL) { >> - bios_name = BIOS_FILENAME; >> - } >> - filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name); >> - if (filename) { >> - bios_size = get_image_size(filename); >> - } else { >> - bios_size = -1; >> - } >> - if (bios_size <= 0 || >> - (bios_size % 65536) != 0) { >> - goto bios_error; >> - } >> - bios = g_malloc(sizeof(*bios)); >> - memory_region_init_ram(bios, NULL, "pc.bios", bios_size, &error_fatal); >> - if (!isapc_ram_fw) { >> - memory_region_set_readonly(bios, true); >> - } >> - ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1); >> - if (ret != 0) { >> - bios_error: >> - fprintf(stderr, "qemu: could not load PC BIOS '%s'\n", bios_name); >> - exit(1); >> - } >> - g_free(filename); >> - >> - /* map the last 128KB of the BIOS in ISA space */ >> - isa_bios_size = MIN(bios_size, 128 * KiB); >> - isa_bios = g_malloc(sizeof(*isa_bios)); >> - memory_region_init_alias(isa_bios, NULL, "isa-bios", bios, >> - bios_size - isa_bios_size, isa_bios_size); >> - memory_region_add_subregion_overlap(rom_memory, >> - 0x100000 - isa_bios_size, >> - isa_bios, >> - 1); >> - if (!isapc_ram_fw) { >> - memory_region_set_readonly(isa_bios, true); >> - } >> - >> - /* map all the bios at the top of memory */ >> - memory_region_add_subregion(rom_memory, >> - (uint32_t)(-bios_size), >> - bios); >> -} >> - >> void pc_system_firmware_init(PCMachineState *pcms, >> MemoryRegion *rom_memory) >> { >> diff --git a/hw/i386/x86.c b/hw/i386/x86.c >> new file mode 100644 >> index 0000000000..a9dee67890 >> --- /dev/null >> +++ b/hw/i386/x86.c >> @@ -0,0 +1,684 @@ >> +/* >> + * Copyright (c) 2003-2004 Fabrice Bellard >> + * Copyright (c) 2019 Red Hat, Inc. >> + * >> + * Permission is hereby granted, free of charge, to any person obtaining a copy >> + * of this software and associated documentation files (the "Software"), to deal >> + * in the Software without restriction, including without limitation the rights >> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell >> + * copies of the Software, and to permit persons to whom the Software is >> + * furnished to do so, subject to the following conditions: >> + * >> + * The above copyright notice and this permission notice shall be included in >> + * all copies or substantial portions of the Software. >> + * >> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR >> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL >> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER >> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, >> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN >> + * THE SOFTWARE. >> + */ >> +#include "qemu/osdep.h" >> +#include "qemu/error-report.h" >> +#include "qemu/option.h" >> +#include "qemu/cutils.h" >> +#include "qemu/units.h" >> +#include "qemu-common.h" >> +#include "qapi/error.h" >> +#include "qapi/qmp/qerror.h" >> +#include "qapi/qapi-visit-common.h" >> +#include "qapi/visitor.h" >> +#include "sysemu/qtest.h" >> +#include "sysemu/numa.h" >> +#include "sysemu/replay.h" >> +#include "sysemu/sysemu.h" >> + >> +#include "hw/i386/x86.h" >> +#include "hw/i386/pc.h" >> +#include "target/i386/cpu.h" >> +#include "hw/i386/topology.h" >> +#include "hw/i386/fw_cfg.h" >> + >> +#include "hw/acpi/cpu_hotplug.h" >> +#include "hw/nmi.h" >> +#include "hw/loader.h" >> +#include "multiboot.h" >> +#include "elf.h" >> +#include "standard-headers/asm-x86/bootparam.h" >> + >> +#define BIOS_FILENAME "bios.bin" >> + >> +/* Physical Address of PVH entry point read from kernel ELF NOTE */ >> +static size_t pvh_start_addr; >> + >> +/* Calculates initial APIC ID for a specific CPU index >> + * >> + * Currently we need to be able to calculate the APIC ID from the CPU index >> + * alone (without requiring a CPU object), as the QEMU<->Seabios interfaces have >> + * no concept of "CPU index", and the NUMA tables on fw_cfg need the APIC ID of >> + * all CPUs up to max_cpus. >> + */ >> +uint32_t x86_cpu_apic_id_from_index(PCMachineState *pcms, >> + unsigned int cpu_index) >> +{ >> + MachineState *ms = MACHINE(pcms); >> + PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); >> + uint32_t correct_id; >> + static bool warned; >> + >> + correct_id = x86_apicid_from_cpu_idx(pcms->smp_dies, ms->smp.cores, >> + ms->smp.threads, cpu_index); >> + if (pcmc->compat_apic_id_mode) { >> + if (cpu_index != correct_id && !warned && !qtest_enabled()) { >> + error_report("APIC IDs set in compatibility mode, " >> + "CPU topology won't match the configuration"); >> + warned = true; >> + } >> + return cpu_index; >> + } else { >> + return correct_id; >> + } >> +} >> + >> +void x86_new_cpu(PCMachineState *pcms, int64_t apic_id, Error **errp) >> +{ >> + Object *cpu = NULL; >> + Error *local_err = NULL; >> + CPUX86State *env = NULL; >> + >> + cpu = object_new(MACHINE(pcms)->cpu_type); >> + >> + env = &X86_CPU(cpu)->env; >> + env->nr_dies = pcms->smp_dies; >> + >> + object_property_set_uint(cpu, apic_id, "apic-id", &local_err); >> + object_property_set_bool(cpu, true, "realized", &local_err); >> + >> + object_unref(cpu); >> + error_propagate(errp, local_err); >> +} >> + >> +void x86_cpus_init(PCMachineState *pcms) >> +{ >> + int i; >> + const CPUArchIdList *possible_cpus; >> + MachineState *ms = MACHINE(pcms); >> + MachineClass *mc = MACHINE_GET_CLASS(pcms); >> + PCMachineClass *pcmc = PC_MACHINE_CLASS(mc); >> + >> + x86_cpu_set_default_version(pcmc->default_cpu_version); >> + >> + /* Calculates the limit to CPU APIC ID values >> + * >> + * Limit for the APIC ID value, so that all >> + * CPU APIC IDs are < pcms->apic_id_limit. >> + * >> + * This is used for FW_CFG_MAX_CPUS. See comments on fw_cfg_arch_create(). >> + */ >> + pcms->apic_id_limit = x86_cpu_apic_id_from_index(pcms, >> + ms->smp.max_cpus - 1) + 1; >> + possible_cpus = mc->possible_cpu_arch_ids(ms); >> + for (i = 0; i < ms->smp.cpus; i++) { >> + x86_new_cpu(pcms, possible_cpus->cpus[i].arch_id, &error_fatal); >> + } >> +} >> + >> +CpuInstanceProperties >> +x86_cpu_index_to_props(MachineState *ms, unsigned cpu_index) >> +{ >> + MachineClass *mc = MACHINE_GET_CLASS(ms); >> + const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms); >> + >> + assert(cpu_index < possible_cpus->len); >> + return possible_cpus->cpus[cpu_index].props; >> +} >> + >> +int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx) >> +{ >> + X86CPUTopoInfo topo; >> + PCMachineState *pcms = PC_MACHINE(ms); >> + >> + assert(idx < ms->possible_cpus->len); >> + x86_topo_ids_from_apicid(ms->possible_cpus->cpus[idx].arch_id, >> + pcms->smp_dies, ms->smp.cores, >> + ms->smp.threads, &topo); >> + return topo.pkg_id % ms->numa_state->num_nodes; >> +} >> + >> +const CPUArchIdList *x86_possible_cpu_arch_ids(MachineState *ms) >> +{ >> + PCMachineState *pcms = PC_MACHINE(ms); >> + int i; >> + unsigned int max_cpus = ms->smp.max_cpus; >> + >> + if (ms->possible_cpus) { >> + /* >> + * make sure that max_cpus hasn't changed since the first use, i.e. >> + * -smp hasn't been parsed after it >> + */ >> + assert(ms->possible_cpus->len == max_cpus); >> + return ms->possible_cpus; >> + } >> + >> + ms->possible_cpus = g_malloc0(sizeof(CPUArchIdList) + >> + sizeof(CPUArchId) * max_cpus); >> + ms->possible_cpus->len = max_cpus; >> + for (i = 0; i < ms->possible_cpus->len; i++) { >> + X86CPUTopoInfo topo; >> + >> + ms->possible_cpus->cpus[i].type = ms->cpu_type; >> + ms->possible_cpus->cpus[i].vcpus_count = 1; >> + ms->possible_cpus->cpus[i].arch_id = x86_cpu_apic_id_from_index(pcms, i); >> + x86_topo_ids_from_apicid(ms->possible_cpus->cpus[i].arch_id, >> + pcms->smp_dies, ms->smp.cores, >> + ms->smp.threads, &topo); >> + ms->possible_cpus->cpus[i].props.has_socket_id = true; >> + ms->possible_cpus->cpus[i].props.socket_id = topo.pkg_id; >> + if (pcms->smp_dies > 1) { >> + ms->possible_cpus->cpus[i].props.has_die_id = true; >> + ms->possible_cpus->cpus[i].props.die_id = topo.die_id; >> + } >> + ms->possible_cpus->cpus[i].props.has_core_id = true; >> + ms->possible_cpus->cpus[i].props.core_id = topo.core_id; >> + ms->possible_cpus->cpus[i].props.has_thread_id = true; >> + ms->possible_cpus->cpus[i].props.thread_id = topo.smt_id; >> + } >> + return ms->possible_cpus; >> +} >> + >> +static long get_file_size(FILE *f) >> +{ >> + long where, size; >> + >> + /* XXX: on Unix systems, using fstat() probably makes more sense */ >> + >> + where = ftell(f); >> + fseek(f, 0, SEEK_END); >> + size = ftell(f); >> + fseek(f, where, SEEK_SET); >> + >> + return size; >> +} >> + >> +struct setup_data { >> + uint64_t next; >> + uint32_t type; >> + uint32_t len; >> + uint8_t data[0]; >> +} __attribute__((packed)); >> + >> +/* >> + * The entry point into the kernel for PVH boot is different from >> + * the native entry point. The PVH entry is defined by the x86/HVM >> + * direct boot ABI and is available in an ELFNOTE in the kernel binary. >> + * >> + * This function is passed to load_elf() when it is called from >> + * load_elfboot() which then additionally checks for an ELF Note of >> + * type XEN_ELFNOTE_PHYS32_ENTRY and passes it to this function to >> + * parse the PVH entry address from the ELF Note. >> + * >> + * Due to trickery in elf_opts.h, load_elf() is actually available as >> + * load_elf32() or load_elf64() and this routine needs to be able >> + * to deal with being called as 32 or 64 bit. >> + * >> + * The address of the PVH entry point is saved to the 'pvh_start_addr' >> + * global variable. (although the entry point is 32-bit, the kernel >> + * binary can be either 32-bit or 64-bit). >> + */ >> +static uint64_t read_pvh_start_addr(void *arg1, void *arg2, bool is64) >> +{ >> + size_t *elf_note_data_addr; >> + >> + /* Check if ELF Note header passed in is valid */ >> + if (arg1 == NULL) { >> + return 0; >> + } >> + >> + if (is64) { >> + struct elf64_note *nhdr64 = (struct elf64_note *)arg1; >> + uint64_t nhdr_size64 = sizeof(struct elf64_note); >> + uint64_t phdr_align = *(uint64_t *)arg2; >> + uint64_t nhdr_namesz = nhdr64->n_namesz; >> + >> + elf_note_data_addr = >> + ((void *)nhdr64) + nhdr_size64 + >> + QEMU_ALIGN_UP(nhdr_namesz, phdr_align); >> + } else { >> + struct elf32_note *nhdr32 = (struct elf32_note *)arg1; >> + uint32_t nhdr_size32 = sizeof(struct elf32_note); >> + uint32_t phdr_align = *(uint32_t *)arg2; >> + uint32_t nhdr_namesz = nhdr32->n_namesz; >> + >> + elf_note_data_addr = >> + ((void *)nhdr32) + nhdr_size32 + >> + QEMU_ALIGN_UP(nhdr_namesz, phdr_align); >> + } >> + >> + pvh_start_addr = *elf_note_data_addr; >> + >> + return pvh_start_addr; >> +} >> + >> +static bool load_elfboot(const char *kernel_filename, >> + int kernel_file_size, >> + uint8_t *header, >> + size_t pvh_xen_start_addr, >> + FWCfgState *fw_cfg) >> +{ >> + uint32_t flags = 0; >> + uint32_t mh_load_addr = 0; >> + uint32_t elf_kernel_size = 0; >> + uint64_t elf_entry; >> + uint64_t elf_low, elf_high; >> + int kernel_size; >> + >> + if (ldl_p(header) != 0x464c457f) { >> + return false; /* no elfboot */ >> + } >> + >> + bool elf_is64 = header[EI_CLASS] == ELFCLASS64; >> + flags = elf_is64 ? >> + ((Elf64_Ehdr *)header)->e_flags : ((Elf32_Ehdr *)header)->e_flags; >> + >> + if (flags & 0x00010004) { /* LOAD_ELF_HEADER_HAS_ADDR */ >> + error_report("elfboot unsupported flags = %x", flags); >> + exit(1); >> + } >> + >> + uint64_t elf_note_type = XEN_ELFNOTE_PHYS32_ENTRY; >> + kernel_size = load_elf(kernel_filename, read_pvh_start_addr, >> + NULL, &elf_note_type, &elf_entry, >> + &elf_low, &elf_high, 0, I386_ELF_MACHINE, >> + 0, 0); >> + >> + if (kernel_size < 0) { >> + error_report("Error while loading elf kernel"); >> + exit(1); >> + } >> + mh_load_addr = elf_low; >> + elf_kernel_size = elf_high - elf_low; >> + >> + if (pvh_start_addr == 0) { >> + error_report("Error loading uncompressed kernel without PVH ELF Note"); >> + exit(1); >> + } >> + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ENTRY, pvh_start_addr); >> + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, mh_load_addr); >> + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, elf_kernel_size); >> + >> + return true; >> +} >> + >> +void x86_load_linux(PCMachineState *pcms, >> + FWCfgState *fw_cfg) >> +{ >> + uint16_t protocol; >> + int setup_size, kernel_size, cmdline_size; >> + int dtb_size, setup_data_offset; >> + uint32_t initrd_max; >> + uint8_t header[8192], *setup, *kernel; >> + hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0; >> + FILE *f; >> + char *vmode; >> + MachineState *machine = MACHINE(pcms); >> + PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); >> + struct setup_data *setup_data; >> + const char *kernel_filename = machine->kernel_filename; >> + const char *initrd_filename = machine->initrd_filename; >> + const char *dtb_filename = machine->dtb; >> + const char *kernel_cmdline = machine->kernel_cmdline; >> + >> + /* Align to 16 bytes as a paranoia measure */ >> + cmdline_size = (strlen(kernel_cmdline)+16) & ~15; >> + >> + /* load the kernel header */ >> + f = fopen(kernel_filename, "rb"); >> + if (!f || !(kernel_size = get_file_size(f)) || >> + fread(header, 1, MIN(ARRAY_SIZE(header), kernel_size), f) != >> + MIN(ARRAY_SIZE(header), kernel_size)) { >> + fprintf(stderr, "qemu: could not load kernel '%s': %s\n", >> + kernel_filename, strerror(errno)); >> + exit(1); >> + } >> + >> + /* kernel protocol version */ >> +#if 0 >> + fprintf(stderr, "header magic: %#x\n", ldl_p(header+0x202)); >> +#endif >> + if (ldl_p(header+0x202) == 0x53726448) { >> + protocol = lduw_p(header+0x206); >> + } else { >> + /* >> + * This could be a multiboot kernel. If it is, let's stop treating it >> + * like a Linux kernel. >> + * Note: some multiboot images could be in the ELF format (the same of >> + * PVH), so we try multiboot first since we check the multiboot magic >> + * header before to load it. >> + */ >> + if (load_multiboot(fw_cfg, f, kernel_filename, initrd_filename, >> + kernel_cmdline, kernel_size, header)) { >> + return; >> + } >> + /* >> + * Check if the file is an uncompressed kernel file (ELF) and load it, >> + * saving the PVH entry point used by the x86/HVM direct boot ABI. >> + * If load_elfboot() is successful, populate the fw_cfg info. >> + */ >> + if (pcmc->pvh_enabled && >> + load_elfboot(kernel_filename, kernel_size, >> + header, pvh_start_addr, fw_cfg)) { >> + fclose(f); >> + >> + fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, >> + strlen(kernel_cmdline) + 1); >> + fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); >> + >> + fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, sizeof(header)); >> + fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, >> + header, sizeof(header)); >> + >> + /* load initrd */ >> + if (initrd_filename) { >> + GMappedFile *mapped_file; >> + gsize initrd_size; >> + gchar *initrd_data; >> + GError *gerr = NULL; >> + >> + mapped_file = g_mapped_file_new(initrd_filename, false, &gerr); >> + if (!mapped_file) { >> + fprintf(stderr, "qemu: error reading initrd %s: %s\n", >> + initrd_filename, gerr->message); >> + exit(1); >> + } >> + pcms->initrd_mapped_file = mapped_file; >> + >> + initrd_data = g_mapped_file_get_contents(mapped_file); >> + initrd_size = g_mapped_file_get_length(mapped_file); >> + initrd_max = pcms->below_4g_mem_size - pcmc->acpi_data_size - 1; >> + if (initrd_size >= initrd_max) { >> + fprintf(stderr, "qemu: initrd is too large, cannot support." >> + "(max: %"PRIu32", need %"PRId64")\n", >> + initrd_max, (uint64_t)initrd_size); >> + exit(1); >> + } >> + >> + initrd_addr = (initrd_max - initrd_size) & ~4095; >> + >> + fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); >> + fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); >> + fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, >> + initrd_size); >> + } >> + >> + option_rom[nb_option_roms].bootindex = 0; >> + option_rom[nb_option_roms].name = "pvh.bin"; >> + nb_option_roms++; >> + >> + return; >> + } >> + protocol = 0; >> + } >> + >> + if (protocol < 0x200 || !(header[0x211] & 0x01)) { >> + /* Low kernel */ >> + real_addr = 0x90000; >> + cmdline_addr = 0x9a000 - cmdline_size; >> + prot_addr = 0x10000; >> + } else if (protocol < 0x202) { >> + /* High but ancient kernel */ >> + real_addr = 0x90000; >> + cmdline_addr = 0x9a000 - cmdline_size; >> + prot_addr = 0x100000; >> + } else { >> + /* High and recent kernel */ >> + real_addr = 0x10000; >> + cmdline_addr = 0x20000; >> + prot_addr = 0x100000; >> + } >> + >> +#if 0 >> + fprintf(stderr, >> + "qemu: real_addr = 0x" TARGET_FMT_plx "\n" >> + "qemu: cmdline_addr = 0x" TARGET_FMT_plx "\n" >> + "qemu: prot_addr = 0x" TARGET_FMT_plx "\n", >> + real_addr, >> + cmdline_addr, >> + prot_addr); >> +#endif >> + >> + /* highest address for loading the initrd */ >> + if (protocol >= 0x20c && >> + lduw_p(header+0x236) & XLF_CAN_BE_LOADED_ABOVE_4G) { >> + /* >> + * Linux has supported initrd up to 4 GB for a very long time (2007, >> + * long before XLF_CAN_BE_LOADED_ABOVE_4G which was added in 2013), >> + * though it only sets initrd_max to 2 GB to "work around bootloader >> + * bugs". Luckily, QEMU firmware(which does something like bootloader) >> + * has supported this. >> + * >> + * It's believed that if XLF_CAN_BE_LOADED_ABOVE_4G is set, initrd can >> + * be loaded into any address. >> + * >> + * In addition, initrd_max is uint32_t simply because QEMU doesn't >> + * support the 64-bit boot protocol (specifically the ext_ramdisk_image >> + * field). >> + * >> + * Therefore here just limit initrd_max to UINT32_MAX simply as well. >> + */ >> + initrd_max = UINT32_MAX; >> + } else if (protocol >= 0x203) { >> + initrd_max = ldl_p(header+0x22c); >> + } else { >> + initrd_max = 0x37ffffff; >> + } >> + >> + if (initrd_max >= pcms->below_4g_mem_size - pcmc->acpi_data_size) { >> + initrd_max = pcms->below_4g_mem_size - pcmc->acpi_data_size - 1; >> + } >> + >> + fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr); >> + fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(kernel_cmdline)+1); >> + fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); >> + >> + if (protocol >= 0x202) { >> + stl_p(header+0x228, cmdline_addr); >> + } else { >> + stw_p(header+0x20, 0xA33F); >> + stw_p(header+0x22, cmdline_addr-real_addr); >> + } >> + >> + /* handle vga= parameter */ >> + vmode = strstr(kernel_cmdline, "vga="); >> + if (vmode) { >> + unsigned int video_mode; >> + /* skip "vga=" */ >> + vmode += 4; >> + if (!strncmp(vmode, "normal", 6)) { >> + video_mode = 0xffff; >> + } else if (!strncmp(vmode, "ext", 3)) { >> + video_mode = 0xfffe; >> + } else if (!strncmp(vmode, "ask", 3)) { >> + video_mode = 0xfffd; >> + } else { >> + video_mode = strtol(vmode, NULL, 0); >> + } >> + stw_p(header+0x1fa, video_mode); >> + } >> + >> + /* loader type */ >> + /* High nybble = B reserved for QEMU; low nybble is revision number. >> + If this code is substantially changed, you may want to consider >> + incrementing the revision. */ >> + if (protocol >= 0x200) { >> + header[0x210] = 0xB0; >> + } >> + /* heap */ >> + if (protocol >= 0x201) { >> + header[0x211] |= 0x80; /* CAN_USE_HEAP */ >> + stw_p(header+0x224, cmdline_addr-real_addr-0x200); >> + } >> + >> + /* load initrd */ >> + if (initrd_filename) { >> + GMappedFile *mapped_file; >> + gsize initrd_size; >> + gchar *initrd_data; >> + GError *gerr = NULL; >> + >> + if (protocol < 0x200) { >> + fprintf(stderr, "qemu: linux kernel too old to load a ram disk\n"); >> + exit(1); >> + } >> + >> + mapped_file = g_mapped_file_new(initrd_filename, false, &gerr); >> + if (!mapped_file) { >> + fprintf(stderr, "qemu: error reading initrd %s: %s\n", >> + initrd_filename, gerr->message); >> + exit(1); >> + } >> + pcms->initrd_mapped_file = mapped_file; >> + >> + initrd_data = g_mapped_file_get_contents(mapped_file); >> + initrd_size = g_mapped_file_get_length(mapped_file); >> + if (initrd_size >= initrd_max) { >> + fprintf(stderr, "qemu: initrd is too large, cannot support." >> + "(max: %"PRIu32", need %"PRId64")\n", >> + initrd_max, (uint64_t)initrd_size); >> + exit(1); >> + } >> + >> + initrd_addr = (initrd_max-initrd_size) & ~4095; >> + >> + fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); >> + fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); >> + fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, initrd_size); >> + >> + stl_p(header+0x218, initrd_addr); >> + stl_p(header+0x21c, initrd_size); >> + } >> + >> + /* load kernel and setup */ >> + setup_size = header[0x1f1]; >> + if (setup_size == 0) { >> + setup_size = 4; >> + } >> + setup_size = (setup_size+1)*512; >> + if (setup_size > kernel_size) { >> + fprintf(stderr, "qemu: invalid kernel header\n"); >> + exit(1); >> + } >> + kernel_size -= setup_size; >> + >> + setup = g_malloc(setup_size); >> + kernel = g_malloc(kernel_size); >> + fseek(f, 0, SEEK_SET); >> + if (fread(setup, 1, setup_size, f) != setup_size) { >> + fprintf(stderr, "fread() failed\n"); >> + exit(1); >> + } >> + if (fread(kernel, 1, kernel_size, f) != kernel_size) { >> + fprintf(stderr, "fread() failed\n"); >> + exit(1); >> + } >> + fclose(f); >> + >> + /* append dtb to kernel */ >> + if (dtb_filename) { >> + if (protocol < 0x209) { >> + fprintf(stderr, "qemu: Linux kernel too old to load a dtb\n"); >> + exit(1); >> + } >> + >> + dtb_size = get_image_size(dtb_filename); >> + if (dtb_size <= 0) { >> + fprintf(stderr, "qemu: error reading dtb %s: %s\n", >> + dtb_filename, strerror(errno)); >> + exit(1); >> + } >> + >> + setup_data_offset = QEMU_ALIGN_UP(kernel_size, 16); >> + kernel_size = setup_data_offset + sizeof(struct setup_data) + dtb_size; >> + kernel = g_realloc(kernel, kernel_size); >> + >> + stq_p(header+0x250, prot_addr + setup_data_offset); >> + >> + setup_data = (struct setup_data *)(kernel + setup_data_offset); >> + setup_data->next = 0; >> + setup_data->type = cpu_to_le32(SETUP_DTB); >> + setup_data->len = cpu_to_le32(dtb_size); >> + >> + load_image_size(dtb_filename, setup_data->data, dtb_size); >> + } >> + >> + memcpy(setup, header, MIN(sizeof(header), setup_size)); >> + >> + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr); >> + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size); >> + fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size); >> + >> + fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr); >> + fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size); >> + fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size); >> + >> + option_rom[nb_option_roms].bootindex = 0; >> + option_rom[nb_option_roms].name = "linuxboot.bin"; >> + if (pcmc->linuxboot_dma_enabled && fw_cfg_dma_enabled(fw_cfg)) { >> + option_rom[nb_option_roms].name = "linuxboot_dma.bin"; >> + } >> + nb_option_roms++; >> +} >> + >> +void x86_system_rom_init(MemoryRegion *rom_memory, bool isapc_ram_fw) >> +{ >> + char *filename; >> + MemoryRegion *bios, *isa_bios; >> + int bios_size, isa_bios_size; >> + int ret; >> + >> + /* BIOS load */ >> + if (bios_name == NULL) { >> + bios_name = BIOS_FILENAME; >> + } >> + filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name); >> + if (filename) { >> + bios_size = get_image_size(filename); >> + } else { >> + bios_size = -1; >> + } >> + if (bios_size <= 0 || >> + (bios_size % 65536) != 0) { >> + goto bios_error; >> + } >> + bios = g_malloc(sizeof(*bios)); >> + memory_region_init_ram(bios, NULL, "pc.bios", bios_size, &error_fatal); >> + if (!isapc_ram_fw) { >> + memory_region_set_readonly(bios, true); >> + } >> + ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1); >> + if (ret != 0) { >> + bios_error: >> + fprintf(stderr, "qemu: could not load PC BIOS '%s'\n", bios_name); >> + exit(1); >> + } >> + g_free(filename); >> + >> + /* map the last 128KB of the BIOS in ISA space */ >> + isa_bios_size = MIN(bios_size, 128 * KiB); >> + isa_bios = g_malloc(sizeof(*isa_bios)); >> + memory_region_init_alias(isa_bios, NULL, "isa-bios", bios, >> + bios_size - isa_bios_size, isa_bios_size); >> + memory_region_add_subregion_overlap(rom_memory, >> + 0x100000 - isa_bios_size, >> + isa_bios, >> + 1); >> + if (!isapc_ram_fw) { >> + memory_region_set_readonly(isa_bios, true); >> + } >> + >> + /* map all the bios at the top of memory */ >> + memory_region_add_subregion(rom_memory, >> + (uint32_t)(-bios_size), >> + bios); >> +} >> diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h >> index d12f42e9e5..73e2847e87 100644 >> --- a/include/hw/i386/pc.h >> +++ b/include/hw/i386/pc.h >> @@ -195,7 +195,6 @@ bool pc_machine_is_smm_enabled(PCMachineState *pcms); >> void pc_register_ferr_irq(qemu_irq irq); >> void pc_acpi_smi_interrupt(void *opaque, int irq, int level); >> -void x86_cpus_init(PCMachineState *pcms); >> void pc_hot_add_cpu(MachineState *ms, const int64_t id, Error **errp); >> void pc_smp_parse(MachineState *ms, QemuOpts *opts); >> diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h >> new file mode 100644 >> index 0000000000..bc1b594a93 >> --- /dev/null >> +++ b/include/hw/i386/x86.h >> @@ -0,0 +1,35 @@ >> +/* >> + * Copyright (c) 2019 Red Hat, Inc. >> + * >> + * This program is free software; you can redistribute it and/or modify it >> + * under the terms and conditions of the GNU General Public License, >> + * version 2 or later, as published by the Free Software Foundation. >> + * >> + * This program is distributed in the hope it will be useful, but WITHOUT >> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or >> + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for >> + * more details. >> + * >> + * You should have received a copy of the GNU General Public License along with >> + * this program. If not, see . >> + */ >> + >> +#ifndef HW_I386_X86_H >> +#define HW_I386_X86_H >> + >> +#include "hw/boards.h" >> + >> +uint32_t x86_cpu_apic_id_from_index(PCMachineState *pcms, >> + unsigned int cpu_index); >> +void x86_new_cpu(PCMachineState *pcms, int64_t apic_id, Error **errp); >> +void x86_cpus_init(PCMachineState *pcms); >> +CpuInstanceProperties x86_cpu_index_to_props(MachineState *ms, >> + unsigned cpu_index); >> +int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx); >> +const CPUArchIdList *x86_possible_cpu_arch_ids(MachineState *ms); >> + >> +void x86_system_rom_init(MemoryRegion *rom_memory, bool isapc_ram_fw); >> + >> +void x86_load_linux(PCMachineState *x86ms, FWCfgState *fw_cfg); >> + >> +#endif >>