All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andre Przywara <andre.przywara@amd.com>
To: qemu-devel@nongnu.org
Cc: Andre Przywara <andre.przywara@amd.com>
Subject: [Qemu-devel] [PATCH 4/4] add BIOS support for an ACPI SRAT table (needed for NUMA support)
Date: Tue, 31 Mar 2009 15:28:57 +0200	[thread overview]
Message-ID: <1238506137-9140-5-git-send-email-andre.przywara@amd.com> (raw)
In-Reply-To: <1238506137-9140-4-git-send-email-andre.przywara@amd.com>


Signed-off-by: Andre Przywara <andre.przywara@amd.com>
---
 .../bios-pq/0012_add-SRAT-ACPI-table-support.patch |  307 ++++++++++++++++++++
 pc-bios/bios-pq/series                             |    1 +
 2 files changed, 308 insertions(+), 0 deletions(-)
 create mode 100644 pc-bios/bios-pq/0012_add-SRAT-ACPI-table-support.patch

diff --git a/pc-bios/bios-pq/0012_add-SRAT-ACPI-table-support.patch b/pc-bios/bios-pq/0012_add-SRAT-ACPI-table-support.patch
new file mode 100644
index 0000000..a98c072
--- /dev/null
+++ b/pc-bios/bios-pq/0012_add-SRAT-ACPI-table-support.patch
@@ -0,0 +1,307 @@
+From 40b77280bf956be2e1d5cbd6b2662e861b480112 Mon Sep 17 00:00:00 2001
+From: Andre Przywara <andre.przywara@amd.com>
+Date: Fri, 20 Mar 2009 00:35:06 +0100
+Subject: [PATCH] add SRAT ACPI table support
+
+Take NUMA topology info from the QEMU firmware configuration interface
+(number of nodes, node for each (V)CPU and amount of memory) and build
+a SRAT table describing this topology for the guest OS. Handles more than
+4 GB of RAM by including a hole for 32bit PCI memory mapping.
+---
+ bios/rombios32.c |  175 ++++++++++++++++++++++++++++++++++++++++++++++++++----
+ 1 files changed, 164 insertions(+), 11 deletions(-)
+
+diff --git a/bios/rombios32.c b/bios/rombios32.c
+index 7be4216..02379c0 100644
+--- a/bios/rombios32.c
++++ b/bios/rombios32.c
+@@ -451,6 +451,11 @@ int pm_sci_int;
+ unsigned long bios_table_cur_addr;
+ unsigned long bios_table_end_addr;
+ 
++static inline uint64_t le64_to_cpu(uint64_t x)
++{
++    return x;
++}
++
+ void wrmsr_smp(uint32_t index, uint64_t val)
+ {
+     static struct { uint32_t ecx, eax, edx; } *p = (void *)SMP_MSR_ADDR;
+@@ -469,6 +474,7 @@ void wrmsr_smp(uint32_t index, uint64_t val)
+ #define QEMU_CFG_SIGNATURE  0x00
+ #define QEMU_CFG_ID         0x01
+ #define QEMU_CFG_UUID       0x02
++#define QEMU_CFG_NUMA       0x0D
+ #define QEMU_CFG_ARCH_LOCAL     0x8000
+ #define QEMU_CFG_ACPI_TABLES  (QEMU_CFG_ARCH_LOCAL + 0)
+ 
+@@ -519,6 +525,14 @@ static int acpi_load_table(int i, uint32_t addr, uint16_t *len)
+     qemu_cfg_read((uint8_t*)addr, *len);
+     return 0;
+ }
++
++uint64_t qemu_cfg_get64 (void)
++{
++    uint64_t ret;
++
++    qemu_cfg_read((uint8_t*)&ret, 8);
++    return le64_to_cpu(ret);
++}
+ #endif
+ 
+ void uuid_probe(void)
+@@ -1273,7 +1287,7 @@ struct rsdt_descriptor_rev1
+ {
+ 	ACPI_TABLE_HEADER_DEF                           /* ACPI common table header */
+ #ifdef BX_QEMU
+-	uint32_t                             table_offset_entry [4]; /* Array of pointers to other */
++	uint32_t                             table_offset_entry [5]; /* Array of pointers to other */
+ #else
+ 	uint32_t                             table_offset_entry [3]; /* Array of pointers to other */
+ #endif
+@@ -1381,7 +1395,7 @@ struct multiple_apic_table
+ } __attribute__((__packed__));
+ 
+ 
+-/* Values for Type in APIC_HEADER_DEF */
++/* Values for Type in APIC sub-headers */
+ 
+ #define APIC_PROCESSOR          0
+ #define APIC_IO                 1
+@@ -1394,18 +1408,18 @@ struct multiple_apic_table
+ #define APIC_XRUPT_SOURCE       8
+ #define APIC_RESERVED           9           /* 9 and greater are reserved */
+ 
+-/*
+- * MADT sub-structures (Follow MULTIPLE_APIC_DESCRIPTION_TABLE)
+- */
+-#define APIC_HEADER_DEF                     /* Common APIC sub-structure header */\
++#define ACPI_SUB_HEADER_DEF                 /* Common ACPI sub-structure header */\
+ 	uint8_t                              type; \
+ 	uint8_t                              length;
+ 
++/*
++ * MADT sub-structures (Follow MULTIPLE_APIC_DESCRIPTION_TABLE)
++ */
+ /* Sub-structures for MADT */
+ 
+ struct madt_processor_apic
+ {
+-	APIC_HEADER_DEF
++	ACPI_SUB_HEADER_DEF
+ 	uint8_t                              processor_id;           /* ACPI processor id */
+ 	uint8_t                              local_apic_id;          /* Processor's local APIC id */
+ #if 0
+@@ -1416,6 +1430,43 @@ struct madt_processor_apic
+ #endif
+ } __attribute__((__packed__));
+ 
++/*
++ * SRAT (NUMA topology description) table
++ */
++
++#define SRAT_PROCESSOR          0
++#define SRAT_MEMORY             1
++
++struct system_resource_affinity_table
++{
++    ACPI_TABLE_HEADER_DEF
++    uint32_t    reserved1;
++    uint32_t    reserved2[2];
++};
++
++struct srat_processor_affinity
++{
++    ACPI_SUB_HEADER_DEF
++    uint8_t     proximity_lo;
++    uint8_t     local_apic_id;
++    uint32_t    flags;
++    uint8_t     local_sapic_eid;
++    uint8_t     proximity_hi[3];
++    uint32_t    reserved;
++};
++
++struct srat_memory_affinity
++{
++    ACPI_SUB_HEADER_DEF
++    uint8_t     proximity[4];
++    uint16_t    reserved1;
++    uint32_t    base_addr_low,base_addr_high;
++    uint32_t    length_low,length_high;
++    uint32_t    reserved2;
++    uint32_t    flags;
++    uint32_t    reserved3[2];
++};
++
+ #ifdef BX_QEMU
+ /*
+  *  * ACPI 2.0 Generic Address Space definition.
+@@ -1444,7 +1495,7 @@ struct acpi_20_hpet {
+ 
+ struct madt_io_apic
+ {
+-	APIC_HEADER_DEF
++	ACPI_SUB_HEADER_DEF
+ 	uint8_t                              io_apic_id;             /* I/O APIC ID */
+ 	uint8_t                              reserved;               /* Reserved - must be zero */
+ 	uint32_t                             address;                /* APIC physical address */
+@@ -1455,7 +1506,7 @@ struct madt_io_apic
+ #ifdef BX_QEMU
+ struct madt_int_override
+ {
+-	APIC_HEADER_DEF
++	ACPI_SUB_HEADER_DEF
+ 	uint8_t                bus;     /* Identifies ISA Bus */
+ 	uint8_t                source;  /* Bus-relative interrupt source */
+ 	uint32_t               gsi;     /* GSI that source will signal */
+@@ -1559,6 +1610,21 @@ int acpi_build_processor_ssdt(uint8_t *ssdt)
+     return ssdt_ptr - ssdt;
+ }
+ 
++static void acpi_build_srat_memory(struct srat_memory_affinity *numamem,
++    uint64_t base, uint64_t len, int node, int enabled)
++{
++     numamem->type = SRAT_MEMORY;
++     numamem->length = sizeof(*numamem);
++     memset (numamem->proximity, 0 ,4);
++     numamem->proximity[0] = node;
++     numamem->flags = cpu_to_le32(!!enabled);
++     numamem->base_addr_low = base & 0xFFFFFFFF;
++     numamem->base_addr_high = base >> 32;
++     numamem->length_low = len & 0xFFFFFFFF;
++     numamem->length_high = len >> 32;
++     return;
++}
++
+ /* base_addr must be a multiple of 4KB */
+ void acpi_bios_init(void)
+ {
+@@ -1569,12 +1635,15 @@ void acpi_bios_init(void)
+     struct multiple_apic_table *madt;
+     uint8_t *dsdt, *ssdt;
+ #ifdef BX_QEMU
++    struct system_resource_affinity_table *srat;
+     struct acpi_20_hpet *hpet;
+     uint32_t hpet_addr;
+ #endif
+     uint32_t base_addr, rsdt_addr, fadt_addr, addr, facs_addr, dsdt_addr, ssdt_addr;
+     uint32_t acpi_tables_size, madt_addr, madt_size, rsdt_size;
++    uint32_t srat_addr,srat_size;
+     uint16_t i, external_tables;
++    int nb_numa_nodes;
+ 
+     /* reserve memory space for tables */
+ #ifdef BX_USE_EBDA_TABLES
+@@ -1616,6 +1685,25 @@ void acpi_bios_init(void)
+     ssdt_addr = addr;
+     ssdt = (void *)(addr);
+     addr += acpi_build_processor_ssdt(ssdt);
++#ifdef BX_QEMU
++    qemu_cfg_select(QEMU_CFG_NUMA);
++    nb_numa_nodes = qemu_cfg_get64();
++#else
++    nb_numa_nodes = 0;
++#endif
++    if (nb_numa_nodes > 0) {
++        addr = (addr + 7) & ~7;
++        srat_addr = addr;
++        srat_size = sizeof(*srat) +
++            sizeof(struct srat_processor_affinity) * smp_cpus +
++            sizeof(struct srat_memory_affinity) * (nb_numa_nodes + 2);
++        srat = (void *)(addr);
++        addr += srat_size;
++    } else {
++        srat_addr = addr;
++        srat = (void*)(addr);
++        srat_size = 0;
++    }
+ 
+     addr = (addr + 7) & ~7;
+     madt_addr = addr;
+@@ -1725,6 +1813,69 @@ void acpi_bios_init(void)
+ 
+     memset(rsdt, 0, rsdt_size);
+ #ifdef BX_QEMU
++    /* SRAT */
++    if (nb_numa_nodes > 0) {
++        struct srat_processor_affinity *core;
++        struct srat_memory_affinity *numamem;
++        int slots;
++        uint64_t mem_len, mem_base, next_base = 0, curnode;
++
++        qemu_cfg_select(QEMU_CFG_NUMA);
++        qemu_cfg_get64();
++        memset (srat, 0 , srat_size);
++        srat->reserved1=1;
++ 
++        core = (void*)(srat + 1);
++        for (i = 0; i < smp_cpus; ++i) {
++             core->type = SRAT_PROCESSOR;
++             core->length = sizeof(*core);
++             core->local_apic_id = i;
++             curnode = qemu_cfg_get64();
++             core->proximity_lo = curnode;
++             memset (core->proximity_hi, 0, 3);
++             core->local_sapic_eid = 0;
++             if (i < smp_cpus)
++                 core->flags = cpu_to_le32(1);
++             else
++                 core->flags = 0;
++             core++;
++        }
++
++        /* the memory map is a bit tricky, it contains at least one hole
++         * from 640k-1M and possibly another one from 3.5G-4G.
++         */
++        numamem = (void*)core; slots = 0;
++        acpi_build_srat_memory(numamem, 0, 640*1024, 0, 1);
++        next_base = 1024 * 1024; numamem++;slots++;
++        for (i = 1; i < nb_numa_nodes + 1; ++i) {
++            mem_base = next_base;
++            mem_len = qemu_cfg_get64();
++            if (i == 1) mem_len -= 1024 * 1024;
++            next_base = mem_base + mem_len;
++ 
++            /* Cut out the PCI hole */
++            if (mem_base <= ram_size && next_base > ram_size) {
++                mem_len -= next_base - ram_size;
++                if (mem_len > 0) {
++                    acpi_build_srat_memory(numamem, mem_base, mem_len, i-1, 1);
++                    numamem++; slots++;
++                }
++                mem_base = 1ULL << 32;
++                mem_len = next_base - ram_size;
++                next_base += (1ULL << 32) - ram_size;
++            }
++            acpi_build_srat_memory(numamem, mem_base, mem_len, i-1, 1);
++            numamem++; slots++;
++        }
++        for (; slots < nb_numa_nodes + 2; slots++) {
++            acpi_build_srat_memory(numamem, 0, 0, 0, 0);
++            numamem++;
++        }
++
++         acpi_build_table_header((struct acpi_table_header *)srat,
++                                "SRAT", srat_size, 1);
++    }
++
+     /* HPET */
+     memset(hpet, 0, sizeof(*hpet));
+     /* Note timer_block_id value must be kept in sync with value advertised by
+@@ -1753,9 +1904,11 @@ void acpi_bios_init(void)
+     rsdt->table_offset_entry[2] = cpu_to_le32(ssdt_addr);
+ #ifdef BX_QEMU
+     rsdt->table_offset_entry[3] = cpu_to_le32(hpet_addr);
++    if (nb_numa_nodes > 0)
++        rsdt->table_offset_entry[4] = cpu_to_le32(srat_addr);
+ #endif
+-    acpi_build_table_header((struct acpi_table_header *)rsdt,
+-                            "RSDT", rsdt_size, 1);
++    acpi_build_table_header((struct acpi_table_header *)rsdt, "RSDT",
++        rsdt_size - (nb_numa_nodes > 0? 0: sizeof(uint32_t)), 1);
+ 
+     acpi_tables_size = addr - base_addr;
+ 
+-- 
+1.6.1.3
+
diff --git a/pc-bios/bios-pq/series b/pc-bios/bios-pq/series
index 5a29df9..bd04a1a 100644
--- a/pc-bios/bios-pq/series
+++ b/pc-bios/bios-pq/series
@@ -9,3 +9,4 @@
 0009_qemu-bios-pci-hotplug-support.patch
 0010_bios-mark-the-acpi-sci-interrupt-as-connected-to-irq-9.patch
 0011_read-additional-acpi-tables-from-a-vm.patch
+0012_add-SRAT-ACPI-table-support.patch
-- 
1.6.1.3

  reply	other threads:[~2009-03-31 13:29 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-03-31 13:28 [Qemu-devel] [PATCH 0/4] add NUMA emulation Andre Przywara
2009-03-31 13:28 ` [Qemu-devel] [PATCH 1/4] added -numa cmdline parameter parser Andre Przywara
2009-03-31 13:28   ` [Qemu-devel] [PATCH 2/4] add info numa command to monitor Andre Przywara
2009-03-31 13:28     ` [Qemu-devel] [PATCH 3/4] sending NUMA topology to BIOS Andre Przywara
2009-03-31 13:28       ` Andre Przywara [this message]
2009-03-31 13:44         ` [Qemu-devel] Re: [PATCH 4/4] add BIOS support for an ACPI SRAT table (needed for NUMA support) Anthony Liguori
2009-03-31 20:04           ` [Qemu-devel] [PATCH 4/4] add SRAT ACPI table support Andre Przywara
2009-03-31 16:00       ` [Qemu-devel] [PATCH 3/4] sending NUMA topology to BIOS Blue Swirl
2009-03-31 21:33         ` Andre Przywara
2009-03-31 13:42   ` [Qemu-devel] Re: [PATCH 1/4] added -numa cmdline parameter parser Anthony Liguori
2009-03-31 20:34     ` Andre Przywara
2009-03-31 14:37 ` [Qemu-devel] Re: [PATCH 0/4] add NUMA emulation Anthony Liguori

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1238506137-9140-5-git-send-email-andre.przywara@amd.com \
    --to=andre.przywara@amd.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.