From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from mailman by lists.gnu.org with tmda-scanned (Exim 4.43) id 1Loe1a-0007Sw-EP for qemu-devel@nongnu.org; Tue, 31 Mar 2009 09:29:06 -0400 Received: from exim by lists.gnu.org with spam-scanned (Exim 4.43) id 1Loe1W-0007RV-Jl for qemu-devel@nongnu.org; Tue, 31 Mar 2009 09:29:06 -0400 Received: from [199.232.76.173] (port=51911 helo=monty-python.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1Loe1W-0007RM-8x for qemu-devel@nongnu.org; Tue, 31 Mar 2009 09:29:02 -0400 Received: from outbound-sin.frontbridge.com ([207.46.51.80]:33006 helo=SG2EHSOBE002.bigfish.com) by monty-python.gnu.org with esmtps (TLS-1.0:RSA_ARCFOUR_MD5:16) (Exim 4.60) (envelope-from ) id 1Loe1S-0004YB-Hh for qemu-devel@nongnu.org; Tue, 31 Mar 2009 09:29:00 -0400 From: Andre Przywara Date: Tue, 31 Mar 2009 15:28:57 +0200 Message-ID: <1238506137-9140-5-git-send-email-andre.przywara@amd.com> In-Reply-To: <1238506137-9140-4-git-send-email-andre.przywara@amd.com> References: <1238506137-9140-1-git-send-email-andre.przywara@amd.com> <1238506137-9140-2-git-send-email-andre.przywara@amd.com> <1238506137-9140-3-git-send-email-andre.przywara@amd.com> <1238506137-9140-4-git-send-email-andre.przywara@amd.com> MIME-Version: 1.0 Content-Type: text/plain Subject: [Qemu-devel] [PATCH 4/4] add BIOS support for an ACPI SRAT table (needed for NUMA support) Reply-To: qemu-devel@nongnu.org List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: qemu-devel@nongnu.org Cc: Andre Przywara Signed-off-by: Andre Przywara --- .../bios-pq/0012_add-SRAT-ACPI-table-support.patch | 307 ++++++++++++++++++++ pc-bios/bios-pq/series | 1 + 2 files changed, 308 insertions(+), 0 deletions(-) create mode 100644 pc-bios/bios-pq/0012_add-SRAT-ACPI-table-support.patch diff --git a/pc-bios/bios-pq/0012_add-SRAT-ACPI-table-support.patch b/pc-bios/bios-pq/0012_add-SRAT-ACPI-table-support.patch new file mode 100644 index 0000000..a98c072 --- /dev/null +++ b/pc-bios/bios-pq/0012_add-SRAT-ACPI-table-support.patch @@ -0,0 +1,307 @@ +From 40b77280bf956be2e1d5cbd6b2662e861b480112 Mon Sep 17 00:00:00 2001 +From: Andre Przywara +Date: Fri, 20 Mar 2009 00:35:06 +0100 +Subject: [PATCH] add SRAT ACPI table support + +Take NUMA topology info from the QEMU firmware configuration interface +(number of nodes, node for each (V)CPU and amount of memory) and build +a SRAT table describing this topology for the guest OS. Handles more than +4 GB of RAM by including a hole for 32bit PCI memory mapping. +--- + bios/rombios32.c | 175 ++++++++++++++++++++++++++++++++++++++++++++++++++---- + 1 files changed, 164 insertions(+), 11 deletions(-) + +diff --git a/bios/rombios32.c b/bios/rombios32.c +index 7be4216..02379c0 100644 +--- a/bios/rombios32.c ++++ b/bios/rombios32.c +@@ -451,6 +451,11 @@ int pm_sci_int; + unsigned long bios_table_cur_addr; + unsigned long bios_table_end_addr; + ++static inline uint64_t le64_to_cpu(uint64_t x) ++{ ++ return x; ++} ++ + void wrmsr_smp(uint32_t index, uint64_t val) + { + static struct { uint32_t ecx, eax, edx; } *p = (void *)SMP_MSR_ADDR; +@@ -469,6 +474,7 @@ void wrmsr_smp(uint32_t index, uint64_t val) + #define QEMU_CFG_SIGNATURE 0x00 + #define QEMU_CFG_ID 0x01 + #define QEMU_CFG_UUID 0x02 ++#define QEMU_CFG_NUMA 0x0D + #define QEMU_CFG_ARCH_LOCAL 0x8000 + #define QEMU_CFG_ACPI_TABLES (QEMU_CFG_ARCH_LOCAL + 0) + +@@ -519,6 +525,14 @@ static int acpi_load_table(int i, uint32_t addr, uint16_t *len) + qemu_cfg_read((uint8_t*)addr, *len); + return 0; + } ++ ++uint64_t qemu_cfg_get64 (void) ++{ ++ uint64_t ret; ++ ++ qemu_cfg_read((uint8_t*)&ret, 8); ++ return le64_to_cpu(ret); ++} + #endif + + void uuid_probe(void) +@@ -1273,7 +1287,7 @@ struct rsdt_descriptor_rev1 + { + ACPI_TABLE_HEADER_DEF /* ACPI common table header */ + #ifdef BX_QEMU +- uint32_t table_offset_entry [4]; /* Array of pointers to other */ ++ uint32_t table_offset_entry [5]; /* Array of pointers to other */ + #else + uint32_t table_offset_entry [3]; /* Array of pointers to other */ + #endif +@@ -1381,7 +1395,7 @@ struct multiple_apic_table + } __attribute__((__packed__)); + + +-/* Values for Type in APIC_HEADER_DEF */ ++/* Values for Type in APIC sub-headers */ + + #define APIC_PROCESSOR 0 + #define APIC_IO 1 +@@ -1394,18 +1408,18 @@ struct multiple_apic_table + #define APIC_XRUPT_SOURCE 8 + #define APIC_RESERVED 9 /* 9 and greater are reserved */ + +-/* +- * MADT sub-structures (Follow MULTIPLE_APIC_DESCRIPTION_TABLE) +- */ +-#define APIC_HEADER_DEF /* Common APIC sub-structure header */\ ++#define ACPI_SUB_HEADER_DEF /* Common ACPI sub-structure header */\ + uint8_t type; \ + uint8_t length; + ++/* ++ * MADT sub-structures (Follow MULTIPLE_APIC_DESCRIPTION_TABLE) ++ */ + /* Sub-structures for MADT */ + + struct madt_processor_apic + { +- APIC_HEADER_DEF ++ ACPI_SUB_HEADER_DEF + uint8_t processor_id; /* ACPI processor id */ + uint8_t local_apic_id; /* Processor's local APIC id */ + #if 0 +@@ -1416,6 +1430,43 @@ struct madt_processor_apic + #endif + } __attribute__((__packed__)); + ++/* ++ * SRAT (NUMA topology description) table ++ */ ++ ++#define SRAT_PROCESSOR 0 ++#define SRAT_MEMORY 1 ++ ++struct system_resource_affinity_table ++{ ++ ACPI_TABLE_HEADER_DEF ++ uint32_t reserved1; ++ uint32_t reserved2[2]; ++}; ++ ++struct srat_processor_affinity ++{ ++ ACPI_SUB_HEADER_DEF ++ uint8_t proximity_lo; ++ uint8_t local_apic_id; ++ uint32_t flags; ++ uint8_t local_sapic_eid; ++ uint8_t proximity_hi[3]; ++ uint32_t reserved; ++}; ++ ++struct srat_memory_affinity ++{ ++ ACPI_SUB_HEADER_DEF ++ uint8_t proximity[4]; ++ uint16_t reserved1; ++ uint32_t base_addr_low,base_addr_high; ++ uint32_t length_low,length_high; ++ uint32_t reserved2; ++ uint32_t flags; ++ uint32_t reserved3[2]; ++}; ++ + #ifdef BX_QEMU + /* + * * ACPI 2.0 Generic Address Space definition. +@@ -1444,7 +1495,7 @@ struct acpi_20_hpet { + + struct madt_io_apic + { +- APIC_HEADER_DEF ++ ACPI_SUB_HEADER_DEF + uint8_t io_apic_id; /* I/O APIC ID */ + uint8_t reserved; /* Reserved - must be zero */ + uint32_t address; /* APIC physical address */ +@@ -1455,7 +1506,7 @@ struct madt_io_apic + #ifdef BX_QEMU + struct madt_int_override + { +- APIC_HEADER_DEF ++ ACPI_SUB_HEADER_DEF + uint8_t bus; /* Identifies ISA Bus */ + uint8_t source; /* Bus-relative interrupt source */ + uint32_t gsi; /* GSI that source will signal */ +@@ -1559,6 +1610,21 @@ int acpi_build_processor_ssdt(uint8_t *ssdt) + return ssdt_ptr - ssdt; + } + ++static void acpi_build_srat_memory(struct srat_memory_affinity *numamem, ++ uint64_t base, uint64_t len, int node, int enabled) ++{ ++ numamem->type = SRAT_MEMORY; ++ numamem->length = sizeof(*numamem); ++ memset (numamem->proximity, 0 ,4); ++ numamem->proximity[0] = node; ++ numamem->flags = cpu_to_le32(!!enabled); ++ numamem->base_addr_low = base & 0xFFFFFFFF; ++ numamem->base_addr_high = base >> 32; ++ numamem->length_low = len & 0xFFFFFFFF; ++ numamem->length_high = len >> 32; ++ return; ++} ++ + /* base_addr must be a multiple of 4KB */ + void acpi_bios_init(void) + { +@@ -1569,12 +1635,15 @@ void acpi_bios_init(void) + struct multiple_apic_table *madt; + uint8_t *dsdt, *ssdt; + #ifdef BX_QEMU ++ struct system_resource_affinity_table *srat; + struct acpi_20_hpet *hpet; + uint32_t hpet_addr; + #endif + uint32_t base_addr, rsdt_addr, fadt_addr, addr, facs_addr, dsdt_addr, ssdt_addr; + uint32_t acpi_tables_size, madt_addr, madt_size, rsdt_size; ++ uint32_t srat_addr,srat_size; + uint16_t i, external_tables; ++ int nb_numa_nodes; + + /* reserve memory space for tables */ + #ifdef BX_USE_EBDA_TABLES +@@ -1616,6 +1685,25 @@ void acpi_bios_init(void) + ssdt_addr = addr; + ssdt = (void *)(addr); + addr += acpi_build_processor_ssdt(ssdt); ++#ifdef BX_QEMU ++ qemu_cfg_select(QEMU_CFG_NUMA); ++ nb_numa_nodes = qemu_cfg_get64(); ++#else ++ nb_numa_nodes = 0; ++#endif ++ if (nb_numa_nodes > 0) { ++ addr = (addr + 7) & ~7; ++ srat_addr = addr; ++ srat_size = sizeof(*srat) + ++ sizeof(struct srat_processor_affinity) * smp_cpus + ++ sizeof(struct srat_memory_affinity) * (nb_numa_nodes + 2); ++ srat = (void *)(addr); ++ addr += srat_size; ++ } else { ++ srat_addr = addr; ++ srat = (void*)(addr); ++ srat_size = 0; ++ } + + addr = (addr + 7) & ~7; + madt_addr = addr; +@@ -1725,6 +1813,69 @@ void acpi_bios_init(void) + + memset(rsdt, 0, rsdt_size); + #ifdef BX_QEMU ++ /* SRAT */ ++ if (nb_numa_nodes > 0) { ++ struct srat_processor_affinity *core; ++ struct srat_memory_affinity *numamem; ++ int slots; ++ uint64_t mem_len, mem_base, next_base = 0, curnode; ++ ++ qemu_cfg_select(QEMU_CFG_NUMA); ++ qemu_cfg_get64(); ++ memset (srat, 0 , srat_size); ++ srat->reserved1=1; ++ ++ core = (void*)(srat + 1); ++ for (i = 0; i < smp_cpus; ++i) { ++ core->type = SRAT_PROCESSOR; ++ core->length = sizeof(*core); ++ core->local_apic_id = i; ++ curnode = qemu_cfg_get64(); ++ core->proximity_lo = curnode; ++ memset (core->proximity_hi, 0, 3); ++ core->local_sapic_eid = 0; ++ if (i < smp_cpus) ++ core->flags = cpu_to_le32(1); ++ else ++ core->flags = 0; ++ core++; ++ } ++ ++ /* the memory map is a bit tricky, it contains at least one hole ++ * from 640k-1M and possibly another one from 3.5G-4G. ++ */ ++ numamem = (void*)core; slots = 0; ++ acpi_build_srat_memory(numamem, 0, 640*1024, 0, 1); ++ next_base = 1024 * 1024; numamem++;slots++; ++ for (i = 1; i < nb_numa_nodes + 1; ++i) { ++ mem_base = next_base; ++ mem_len = qemu_cfg_get64(); ++ if (i == 1) mem_len -= 1024 * 1024; ++ next_base = mem_base + mem_len; ++ ++ /* Cut out the PCI hole */ ++ if (mem_base <= ram_size && next_base > ram_size) { ++ mem_len -= next_base - ram_size; ++ if (mem_len > 0) { ++ acpi_build_srat_memory(numamem, mem_base, mem_len, i-1, 1); ++ numamem++; slots++; ++ } ++ mem_base = 1ULL << 32; ++ mem_len = next_base - ram_size; ++ next_base += (1ULL << 32) - ram_size; ++ } ++ acpi_build_srat_memory(numamem, mem_base, mem_len, i-1, 1); ++ numamem++; slots++; ++ } ++ for (; slots < nb_numa_nodes + 2; slots++) { ++ acpi_build_srat_memory(numamem, 0, 0, 0, 0); ++ numamem++; ++ } ++ ++ acpi_build_table_header((struct acpi_table_header *)srat, ++ "SRAT", srat_size, 1); ++ } ++ + /* HPET */ + memset(hpet, 0, sizeof(*hpet)); + /* Note timer_block_id value must be kept in sync with value advertised by +@@ -1753,9 +1904,11 @@ void acpi_bios_init(void) + rsdt->table_offset_entry[2] = cpu_to_le32(ssdt_addr); + #ifdef BX_QEMU + rsdt->table_offset_entry[3] = cpu_to_le32(hpet_addr); ++ if (nb_numa_nodes > 0) ++ rsdt->table_offset_entry[4] = cpu_to_le32(srat_addr); + #endif +- acpi_build_table_header((struct acpi_table_header *)rsdt, +- "RSDT", rsdt_size, 1); ++ acpi_build_table_header((struct acpi_table_header *)rsdt, "RSDT", ++ rsdt_size - (nb_numa_nodes > 0? 0: sizeof(uint32_t)), 1); + + acpi_tables_size = addr - base_addr; + +-- +1.6.1.3 + diff --git a/pc-bios/bios-pq/series b/pc-bios/bios-pq/series index 5a29df9..bd04a1a 100644 --- a/pc-bios/bios-pq/series +++ b/pc-bios/bios-pq/series @@ -9,3 +9,4 @@ 0009_qemu-bios-pci-hotplug-support.patch 0010_bios-mark-the-acpi-sci-interrupt-as-connected-to-irq-9.patch 0011_read-additional-acpi-tables-from-a-vm.patch +0012_add-SRAT-ACPI-table-support.patch -- 1.6.1.3