From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from mailman by lists.gnu.org with tmda-scanned (Exim 4.43) id 1LCamk-0005vJ-J9 for qemu-devel@nongnu.org; Tue, 16 Dec 2008 09:20:30 -0500 Received: from exim by lists.gnu.org with spam-scanned (Exim 4.43) id 1LCamj-0005ud-H0 for qemu-devel@nongnu.org; Tue, 16 Dec 2008 09:20:29 -0500 Received: from [199.232.76.173] (port=56169 helo=monty-python.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1LCamj-0005uV-1l for qemu-devel@nongnu.org; Tue, 16 Dec 2008 09:20:29 -0500 Received: from outbound-dub.frontbridge.com ([213.199.154.16]:12117 helo=IE1EHSOBE003.bigfish.com) by monty-python.gnu.org with esmtps (TLS-1.0:RSA_ARCFOUR_MD5:16) (Exim 4.60) (envelope-from ) id 1LCami-0006ue-Ec for qemu-devel@nongnu.org; Tue, 16 Dec 2008 09:20:28 -0500 Message-ID: <4947B94F.4030206@amd.com> Date: Tue, 16 Dec 2008 15:21:03 +0100 From: Andre Przywara MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="------------030708030708000802010305" Subject: [Qemu-devel] [PATCH 8/8] v2: add SRAT generation to BIOS Reply-To: qemu-devel@nongnu.org List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Anthony Liguori Cc: qemu-devel@nongnu.org, Avi Kivity --------------030708030708000802010305 Content-Type: text/plain; charset="ISO-8859-1"; format=flowed Content-Transfer-Encoding: 7bit Signed-off-by: Andre Przywara -- Andre Przywara AMD-Operating System Research Center (OSRC), Dresden, Germany Tel: +49 351 277-84917 ----to satisfy European Law for business letters: AMD Saxony Limited Liability Company & Co. KG, Wilschdorfer Landstr. 101, 01109 Dresden, Germany Register Court Dresden: HRA 4896, General Partner authorized to represent: AMD Saxony LLC (Wilmington, Delaware, US) General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy --------------030708030708000802010305 Content-Type: text/x-patch; name="qemunuma_v2_bios_srat.patch" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="qemunuma_v2_bios_srat.patch" # HG changeset patch # User Andre Przywara # Date 1229435604 -3600 # Node ID eca1bcd2031b23e6ac744777571d63cff65126eb # Parent 4f0a8ac2d88ffffc1dcd82785c1620553baa86da add SRAT table generation to BIOS (preliminary patch) diff -r 4f0a8ac2d88f -r eca1bcd2031b pc-bios/bios.diff --- a/pc-bios/bios.diff Tue Dec 16 14:52:48 2008 +0100 +++ b/pc-bios/bios.diff Tue Dec 16 14:53:24 2008 +0100 @@ -130,7 +130,102 @@ regs.u.r32.ecx = 0x14; --- bochs-2.3.7.orig/bios/rombios32.c +++ bochs-2.3.7/bios/rombios32.c -@@ -479,7 +479,12 @@ +@@ -393,6 +393,75 @@ + unsigned long bios_table_cur_addr; + unsigned long bios_table_end_addr; + ++static inline uint16_t le16_to_cpu(uint16_t x) ++{ ++ return x; ++} ++ ++static inline uint32_t le32_to_cpu(uint32_t x) ++{ ++ return x; ++} ++ ++static inline uint64_t le64_to_cpu(uint64_t x) ++{ ++ return x; ++} ++ ++#ifdef BX_QEMU ++#define QEMU_CFG_CTL_PORT 0x510 ++#define QEMU_CFG_DATA_PORT 0x511 ++#define QEMU_CFG_SIGNATURE 0x00 ++#define QEMU_CFG_ID 0x01 ++#define QEMU_CFG_UUID 0x02 ++#define QEMU_CFG_NUMA_NODES 0x07 ++#define QEMU_CFG_NUMA_VCPUS 0x08 ++#define QEMU_CFG_NUMA_MEM 0x09 ++ ++int qemu_cfg_port; ++ ++void qemu_cfg_select(int f) ++{ ++ outw(QEMU_CFG_CTL_PORT, f); ++} ++ ++int qemu_cfg_port_probe() ++{ ++ char *sig = "QEMU"; ++ int i; ++ ++ qemu_cfg_select(QEMU_CFG_SIGNATURE); ++ ++ for (i = 0; i < 4; i++) ++ if (inb(QEMU_CFG_DATA_PORT) != sig[i]) ++ return 0; ++ ++ return 1; ++} ++ ++void qemu_cfg_read(uint8_t *buf, int len) ++{ ++ while (len--) ++ *(buf++) = inb(QEMU_CFG_DATA_PORT); ++} ++ ++uint32_t qemu_cfg_get32 (void) ++{ ++ uint32_t ret; ++ ++ qemu_cfg_read ((uint8_t*)&ret, 4); ++ return le32_to_cpu (ret); ++} ++ ++uint64_t qemu_cfg_get64 (void) ++{ ++ uint64_t ret; ++ ++ qemu_cfg_read ((uint8_t*)&ret, 8); ++ return le64_to_cpu (ret); ++} ++#endif ++ + void uuid_probe(void) + { + #ifdef BX_QEMU +@@ -420,6 +489,18 @@ + } + } + ++int get_numa_nodes(void) ++{ ++ uint16_t nodes = 0; ++#ifdef BX_QEMU ++ if(qemu_cfg_port) { ++ qemu_cfg_select(QEMU_CFG_NUMA_NODES); ++ qemu_cfg_read((uint8_t*)&nodes, 2); ++ } ++#endif ++ return le16_to_cpu(nodes); ++} ++ + void cpu_probe(void) + { + uint32_t eax, ebx, ecx, edx; +@@ -479,7 +560,12 @@ sipi_vector = AP_BOOT_ADDR >> 12; writel(APIC_BASE + APIC_ICR_LOW, 0x000C4600 | sipi_vector); @@ -143,3 +238,242 @@ smp_cpus = readw((void *)CPU_COUNT_ADDR); } +@@ -1082,7 +1168,7 @@ + struct rsdt_descriptor_rev1 + { + ACPI_TABLE_HEADER_DEF /* ACPI common table header */ +- uint32_t table_offset_entry [3]; /* Array of pointers to other */ ++ uint32_t table_offset_entry [4]; /* Array of pointers to other */ + /* ACPI tables */ + }; + +@@ -1200,6 +1286,9 @@ + #define APIC_XRUPT_SOURCE 8 + #define APIC_RESERVED 9 /* 9 and greater are reserved */ + ++#define SRAT_PROCESSOR 0 ++#define SRAT_MEMORY 1 ++ + /* + * MADT sub-structures (Follow MULTIPLE_APIC_DESCRIPTION_TABLE) + */ +@@ -1207,6 +1296,40 @@ + uint8_t type; \ + uint8_t length; + ++/* ++ * SRAT (NUMA topology description) table ++ */ ++struct system_resource_affinity_table ++{ ++ ACPI_TABLE_HEADER_DEF ++ uint32_t reserved1; ++ uint32_t reserved2[2]; ++}; ++ ++struct srat_processor_affinity ++{ ++APIC_HEADER_DEF ++ uint8_t proximity_lo; ++ uint8_t local_apic_id; ++ uint32_t flags; ++ uint8_t local_sapic_eid; ++ uint8_t proximity_hi[3]; ++ uint32_t reserved; ++}; ++ ++struct srat_memory_affinity ++{ ++ APIC_HEADER_DEF ++ uint8_t proximity[4]; ++ uint16_t reserved1; ++ uint32_t base_addr_low,base_addr_high; ++ uint32_t length_low,length_high; ++ uint32_t reserved2; ++ uint32_t flags; ++ uint32_t reserved3[2]; ++}; ++ ++ + /* Sub-structures for MADT */ + + struct madt_processor_apic +@@ -1253,6 +1376,26 @@ + return (-sum) & 0xff; + } + ++static void read_config_numa_vcpus (uint32_t *nodes, int numnodes) ++{ ++#ifdef BX_QEMU ++uint64_t cpumask; ++int node,cpu; ++ ++ qemu_cfg_select (QEMU_CFG_NUMA_VCPUS); ++ for (node = 0; node < numnodes; node++) { ++ cpumask = qemu_cfg_get64(); ++ for (cpu = 0; cpu < 64; cpu++) { ++ if (cpumask == 0) break; ++ if (cpumask & 1) nodes[cpu]=node; ++ cpumask >>= 1; ++ } ++ } ++#endif ++ return; ++ ++} ++ + static void acpi_build_table_header(struct acpi_table_header *h, + char *sig, int len, uint8_t rev) + { +@@ -1328,6 +1471,21 @@ + return ssdt_ptr - ssdt; + } + ++static void acpi_build_srat_memory(struct srat_memory_affinity *numamem, ++ uint64_t base, uint64_t len, int node, int enabled) ++{ ++ numamem->type = SRAT_MEMORY; ++ numamem->length = sizeof(*numamem); ++ memset (numamem->proximity, 0 ,4); ++ numamem->proximity[0] = node; ++ numamem->flags = cpu_to_le32(!!enabled); ++ numamem->base_addr_low = base & 0xFFFFFFFF; ++ numamem->base_addr_high = base >> 32; ++ numamem->length_low = len & 0xFFFFFFFF; ++ numamem->length_high = len >> 32; ++ return; ++} ++ + /* base_addr must be a multiple of 4KB */ + void acpi_bios_init(void) + { +@@ -1336,10 +1494,12 @@ + struct fadt_descriptor_rev1 *fadt; + struct facs_descriptor_rev1 *facs; + struct multiple_apic_table *madt; ++ struct system_resource_affinity_table *srat; + uint8_t *dsdt, *ssdt; + uint32_t base_addr, rsdt_addr, fadt_addr, addr, facs_addr, dsdt_addr, ssdt_addr; + uint32_t acpi_tables_size, madt_addr, madt_size; +- int i; ++ uint32_t srat_addr, srat_size; ++ int i, numanodes; + + /* reserve memory space for tables */ + #ifdef BX_USE_EBDA_TABLES +@@ -1375,6 +1535,21 @@ + ssdt = (void *)(addr); + addr += acpi_build_processor_ssdt(ssdt); + ++ numanodes = get_numa_nodes(); ++ if (numanodes > 0) { ++ addr = (addr + 7) & ~7; ++ srat_addr = addr; ++ srat_size = sizeof(*srat) + ++ sizeof(struct srat_processor_affinity) * smp_cpus + ++ sizeof(struct srat_memory_affinity) * (numanodes + 2); ++ srat = (void *)(addr); ++ addr += srat_size; ++ } else { ++ srat_addr = addr; ++ srat = (void*)(addr); ++ srat_size = 0; ++ } ++ + addr = (addr + 7) & ~7; + madt_addr = addr; + madt_size = sizeof(*madt) + +@@ -1405,8 +1580,10 @@ + rsdt->table_offset_entry[0] = cpu_to_le32(fadt_addr); + rsdt->table_offset_entry[1] = cpu_to_le32(madt_addr); + rsdt->table_offset_entry[2] = cpu_to_le32(ssdt_addr); +- acpi_build_table_header((struct acpi_table_header *)rsdt, +- "RSDT", sizeof(*rsdt), 1); ++ if (numanodes > 0) ++ rsdt->table_offset_entry[3] = cpu_to_le32(srat_addr); ++ acpi_build_table_header((struct acpi_table_header *)rsdt, "RSDT", ++ sizeof(*rsdt) - (numanodes > 0? 0: sizeof(uint32_t)), 1); + + /* FADT */ + memset(fadt, 0, sizeof(*fadt)); +@@ -1466,6 +1643,69 @@ + acpi_build_table_header((struct acpi_table_header *)madt, + "APIC", madt_size, 1); + } ++ ++ /* SRAT */ ++#ifdef BX_QEMU ++ if (numanodes > 0) { ++ struct srat_processor_affinity *core; ++ struct srat_memory_affinity *numamem; ++ int slots; ++ uint64_t mem_len, mem_base, next_base = 0; ++ uint32_t nodes[64]; ++ ++ memset (srat, 0 , srat_size); ++ srat->reserved1=1; ++ ++ read_config_numa_vcpus (nodes, numanodes); ++ core = (void*)(srat + 1); ++ for (i = 0; i < smp_cpus; ++i) { ++ core->type = SRAT_PROCESSOR; ++ core->length = sizeof(*core); ++ core->local_apic_id = i; ++ core->proximity_lo = nodes[i]; ++ memset (core->proximity_hi, 0, 3); ++ core->local_sapic_eid = 0; ++ if (i < smp_cpus) ++ core->flags = cpu_to_le32(1); ++ else ++ core->flags = 0; ++ core++; ++ } ++ /* the memory map is a bit tricky, it contains at least one hole ++ from 640k-1M and possibly another one from 3.5G-4G. */ ++ numamem = (void*)core; slots = 0; ++ qemu_cfg_select (QEMU_CFG_NUMA_MEM); ++ acpi_build_srat_memory(numamem, 0, 640*1024, 0, 1); ++ next_base = 1024 * 1024; numamem++;slots++; ++ for (i = 1; i < numanodes + 1; ++i) { ++ mem_base = next_base; ++ mem_len = qemu_cfg_get64(); ++ if (i == 1) mem_len -= 1024 * 1024; ++ next_base = mem_base + mem_len; ++ ++ /* Cut out the PCI hole */ ++ if (mem_base <= ram_size && next_base > ram_size) { ++ mem_len -= next_base - ram_size; ++ if (mem_len > 0) { ++ acpi_build_srat_memory(numamem, mem_base, mem_len, i-1, 1); ++ numamem++; slots++; ++ } ++ mem_base = 1ULL << 32; ++ mem_len = next_base - ram_size; ++ next_base += (1ULL << 32) - ram_size; ++ } ++ acpi_build_srat_memory(numamem, mem_base, mem_len, i-1, 1); ++ numamem++; slots++; ++ } ++ for (; slots < numanodes + 2; slots++) { ++ acpi_build_srat_memory(numamem, 0, 0, 0, 0); ++ numamem++; ++ } ++ ++ acpi_build_table_header((struct acpi_table_header *)srat, ++ "SRAT", srat_size, 1); ++ } ++#endif + } + + /* SMBIOS entry point -- must be written to a 16-bit aligned address +@@ -1982,6 +2222,10 @@ + { + BX_INFO("Starting rombios32\n"); + ++ #ifdef BX_QEMU ++ qemu_cfg_port = qemu_cfg_port_probe(); ++ #endif ++ + ram_probe(); + + cpu_probe(); --------------030708030708000802010305--