* [PATCH v8 01/11] hw/i386: add 4g boundary start to X86MachineState
2022-07-15 17:16 [PATCH v8 00/11] i386/pc: Fix creation of >= 1010G guests on AMD systems with IOMMU Joao Martins
@ 2022-07-15 17:16 ` Joao Martins
2022-07-15 17:16 ` [PATCH v8 02/11] i386/pc: create pci-host qdev prior to pc_memory_init() Joao Martins
` (9 subsequent siblings)
10 siblings, 0 replies; 23+ messages in thread
From: Joao Martins @ 2022-07-15 17:16 UTC (permalink / raw)
To: qemu-devel
Cc: Igor Mammedov, Eduardo Habkost, Michael S. Tsirkin,
Richard Henderson, Alex Williamson, Paolo Bonzini, Ani Sinha,
Marcel Apfelbaum, Dr. David Alan Gilbert, Suravee Suthikulpanit,
Joao Martins
Rather than hardcoding the 4G boundary everywhere, introduce a
X86MachineState field @above_4g_mem_start and use it
accordingly.
This is in preparation for relocating ram-above-4g to be
dynamically start at 1T on AMD platforms.
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
---
hw/i386/acpi-build.c | 2 +-
hw/i386/pc.c | 11 ++++++-----
hw/i386/sgx.c | 2 +-
hw/i386/x86.c | 1 +
include/hw/i386/x86.h | 3 +++
5 files changed, 12 insertions(+), 7 deletions(-)
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index cad6f5ac41e9..0355bd3ddaad 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2024,7 +2024,7 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
build_srat_memory(table_data, mem_base, mem_len, i - 1,
MEM_AFFINITY_ENABLED);
}
- mem_base = 1ULL << 32;
+ mem_base = x86ms->above_4g_mem_start;
mem_len = next_base - x86ms->below_4g_mem_size;
next_base = mem_base + mem_len;
}
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 8d68295fdaff..1660684d12fd 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -850,9 +850,10 @@ void pc_memory_init(PCMachineState *pcms,
machine->ram,
x86ms->below_4g_mem_size,
x86ms->above_4g_mem_size);
- memory_region_add_subregion(system_memory, 0x100000000ULL,
+ memory_region_add_subregion(system_memory, x86ms->above_4g_mem_start,
ram_above_4g);
- e820_add_entry(0x100000000ULL, x86ms->above_4g_mem_size, E820_RAM);
+ e820_add_entry(x86ms->above_4g_mem_start, x86ms->above_4g_mem_size,
+ E820_RAM);
}
if (pcms->sgx_epc.size != 0) {
@@ -893,7 +894,7 @@ void pc_memory_init(PCMachineState *pcms,
machine->device_memory->base = sgx_epc_above_4g_end(&pcms->sgx_epc);
} else {
machine->device_memory->base =
- 0x100000000ULL + x86ms->above_4g_mem_size;
+ x86ms->above_4g_mem_start + x86ms->above_4g_mem_size;
}
machine->device_memory->base =
@@ -927,7 +928,7 @@ void pc_memory_init(PCMachineState *pcms,
} else if (pcms->sgx_epc.size != 0) {
cxl_base = sgx_epc_above_4g_end(&pcms->sgx_epc);
} else {
- cxl_base = 0x100000000ULL + x86ms->above_4g_mem_size;
+ cxl_base = x86ms->above_4g_mem_start + x86ms->above_4g_mem_size;
}
e820_add_entry(cxl_base, cxl_size, E820_RESERVED);
@@ -1035,7 +1036,7 @@ uint64_t pc_pci_hole64_start(void)
} else if (pcms->sgx_epc.size != 0) {
hole64_start = sgx_epc_above_4g_end(&pcms->sgx_epc);
} else {
- hole64_start = 0x100000000ULL + x86ms->above_4g_mem_size;
+ hole64_start = x86ms->above_4g_mem_start + x86ms->above_4g_mem_size;
}
return ROUND_UP(hole64_start, 1 * GiB);
diff --git a/hw/i386/sgx.c b/hw/i386/sgx.c
index a44d66ba2afc..09d9c7c73d9f 100644
--- a/hw/i386/sgx.c
+++ b/hw/i386/sgx.c
@@ -295,7 +295,7 @@ void pc_machine_init_sgx_epc(PCMachineState *pcms)
return;
}
- sgx_epc->base = 0x100000000ULL + x86ms->above_4g_mem_size;
+ sgx_epc->base = x86ms->above_4g_mem_start + x86ms->above_4g_mem_size;
memory_region_init(&sgx_epc->mr, OBJECT(pcms), "sgx-epc", UINT64_MAX);
memory_region_add_subregion(get_system_memory(), sgx_epc->base,
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index 6003b4b2dfea..029264c54fe2 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -1373,6 +1373,7 @@ static void x86_machine_initfn(Object *obj)
x86ms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6);
x86ms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8);
x86ms->bus_lock_ratelimit = 0;
+ x86ms->above_4g_mem_start = 4 * GiB;
}
static void x86_machine_class_init(ObjectClass *oc, void *data)
diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h
index 9089bdd99c3a..df82c5fd4252 100644
--- a/include/hw/i386/x86.h
+++ b/include/hw/i386/x86.h
@@ -56,6 +56,9 @@ struct X86MachineState {
/* RAM information (sizes, addresses, configuration): */
ram_addr_t below_4g_mem_size, above_4g_mem_size;
+ /* Start address of the initial RAM above 4G */
+ uint64_t above_4g_mem_start;
+
/* CPU and apic information: */
bool apic_xrupt_override;
unsigned pci_irq_mask;
--
2.17.2
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH v8 02/11] i386/pc: create pci-host qdev prior to pc_memory_init()
2022-07-15 17:16 [PATCH v8 00/11] i386/pc: Fix creation of >= 1010G guests on AMD systems with IOMMU Joao Martins
2022-07-15 17:16 ` [PATCH v8 01/11] hw/i386: add 4g boundary start to X86MachineState Joao Martins
@ 2022-07-15 17:16 ` Joao Martins
2022-07-15 17:16 ` [PATCH v8 03/11] i386/pc: pass pci_hole64_size " Joao Martins
` (8 subsequent siblings)
10 siblings, 0 replies; 23+ messages in thread
From: Joao Martins @ 2022-07-15 17:16 UTC (permalink / raw)
To: qemu-devel
Cc: Igor Mammedov, Eduardo Habkost, Michael S. Tsirkin,
Richard Henderson, Alex Williamson, Paolo Bonzini, Ani Sinha,
Marcel Apfelbaum, Dr. David Alan Gilbert, Suravee Suthikulpanit,
Joao Martins
At the start of pc_memory_init() we usually pass a range of
0..UINT64_MAX as pci_memory, when really its 2G (i440fx) or
32G (q35). To get the real user value, we need to get pci-host
passed property for default pci_hole64_size. Thus to get that,
create the qdev prior to memory init to better make estimations
on max used/phys addr.
This is in preparation to determine that host-phys-bits are
enough and also for pci-hole64-size to be considered to relocate
ram-above-4g to be at 1T (on AMD platforms).
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
---
hw/i386/pc_piix.c | 7 +++++--
hw/i386/pc_q35.c | 6 +++---
hw/pci-host/i440fx.c | 5 ++---
include/hw/pci-host/i440fx.h | 3 ++-
4 files changed, 12 insertions(+), 9 deletions(-)
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index a234989ac363..6186a1473755 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -91,6 +91,7 @@ static void pc_init1(MachineState *machine,
MemoryRegion *pci_memory;
MemoryRegion *rom_memory;
ram_addr_t lowmem;
+ DeviceState *i440fx_host;
/*
* Calculate ram split, for memory below and above 4G. It's a bit
@@ -164,9 +165,11 @@ static void pc_init1(MachineState *machine,
pci_memory = g_new(MemoryRegion, 1);
memory_region_init(pci_memory, NULL, "pci", UINT64_MAX);
rom_memory = pci_memory;
+ i440fx_host = qdev_new(host_type);
} else {
pci_memory = NULL;
rom_memory = system_memory;
+ i440fx_host = NULL;
}
pc_guest_info_init(pcms);
@@ -200,8 +203,8 @@ static void pc_init1(MachineState *machine,
const char *type = xen_enabled() ? TYPE_PIIX3_XEN_DEVICE
: TYPE_PIIX3_DEVICE;
- pci_bus = i440fx_init(host_type,
- pci_type,
+ pci_bus = i440fx_init(pci_type,
+ i440fx_host,
system_memory, system_io, machine->ram_size,
x86ms->below_4g_mem_size,
x86ms->above_4g_mem_size,
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index f96cbd04e284..46ea89e564de 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -203,12 +203,12 @@ static void pc_q35_init(MachineState *machine)
pcms->smbios_entry_point_type);
}
- /* allocate ram and load rom/bios */
- pc_memory_init(pcms, get_system_memory(), rom_memory, &ram_memory);
-
/* create pci host bus */
q35_host = Q35_HOST_DEVICE(qdev_new(TYPE_Q35_HOST_DEVICE));
+ /* allocate ram and load rom/bios */
+ pc_memory_init(pcms, get_system_memory(), rom_memory, &ram_memory);
+
object_property_add_child(qdev_get_machine(), "q35", OBJECT(q35_host));
object_property_set_link(OBJECT(q35_host), MCH_HOST_PROP_RAM_MEM,
OBJECT(ram_memory), NULL);
diff --git a/hw/pci-host/i440fx.c b/hw/pci-host/i440fx.c
index 1c5ad5f918a2..d5426ef4a53c 100644
--- a/hw/pci-host/i440fx.c
+++ b/hw/pci-host/i440fx.c
@@ -237,7 +237,8 @@ static void i440fx_realize(PCIDevice *dev, Error **errp)
}
}
-PCIBus *i440fx_init(const char *host_type, const char *pci_type,
+PCIBus *i440fx_init(const char *pci_type,
+ DeviceState *dev,
MemoryRegion *address_space_mem,
MemoryRegion *address_space_io,
ram_addr_t ram_size,
@@ -246,7 +247,6 @@ PCIBus *i440fx_init(const char *host_type, const char *pci_type,
MemoryRegion *pci_address_space,
MemoryRegion *ram_memory)
{
- DeviceState *dev;
PCIBus *b;
PCIDevice *d;
PCIHostState *s;
@@ -254,7 +254,6 @@ PCIBus *i440fx_init(const char *host_type, const char *pci_type,
unsigned i;
I440FXState *i440fx;
- dev = qdev_new(host_type);
s = PCI_HOST_BRIDGE(dev);
b = pci_root_bus_new(dev, NULL, pci_address_space,
address_space_io, 0, TYPE_PCI_BUS);
diff --git a/include/hw/pci-host/i440fx.h b/include/hw/pci-host/i440fx.h
index 52518dbf08e6..d02bf1ed6b93 100644
--- a/include/hw/pci-host/i440fx.h
+++ b/include/hw/pci-host/i440fx.h
@@ -35,7 +35,8 @@ struct PCII440FXState {
#define TYPE_IGD_PASSTHROUGH_I440FX_PCI_DEVICE "igd-passthrough-i440FX"
-PCIBus *i440fx_init(const char *host_type, const char *pci_type,
+PCIBus *i440fx_init(const char *pci_type,
+ DeviceState *dev,
MemoryRegion *address_space_mem,
MemoryRegion *address_space_io,
ram_addr_t ram_size,
--
2.17.2
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH v8 03/11] i386/pc: pass pci_hole64_size to pc_memory_init()
2022-07-15 17:16 [PATCH v8 00/11] i386/pc: Fix creation of >= 1010G guests on AMD systems with IOMMU Joao Martins
2022-07-15 17:16 ` [PATCH v8 01/11] hw/i386: add 4g boundary start to X86MachineState Joao Martins
2022-07-15 17:16 ` [PATCH v8 02/11] i386/pc: create pci-host qdev prior to pc_memory_init() Joao Martins
@ 2022-07-15 17:16 ` Joao Martins
2022-07-15 17:16 ` [PATCH v8 04/11] i386/pc: factor out above-4g end to an helper Joao Martins
` (7 subsequent siblings)
10 siblings, 0 replies; 23+ messages in thread
From: Joao Martins @ 2022-07-15 17:16 UTC (permalink / raw)
To: qemu-devel
Cc: Igor Mammedov, Eduardo Habkost, Michael S. Tsirkin,
Richard Henderson, Alex Williamson, Paolo Bonzini, Ani Sinha,
Marcel Apfelbaum, Dr. David Alan Gilbert, Suravee Suthikulpanit,
Joao Martins
Use the pre-initialized pci-host qdev and fetch the
pci-hole64-size into pc_memory_init() newly added argument.
Use PCI_HOST_PROP_PCI_HOLE64_SIZE pci-host property for
fetching pci-hole64-size.
This is in preparation to determine that host-phys-bits are
enough and for pci-hole64-size to be considered to relocate
ram-above-4g to be at 1T (on AMD platforms).
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
---
hw/i386/pc.c | 3 ++-
hw/i386/pc_piix.c | 7 ++++++-
hw/i386/pc_q35.c | 10 +++++++++-
include/hw/i386/pc.h | 3 ++-
4 files changed, 19 insertions(+), 4 deletions(-)
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 1660684d12fd..e952dc62a12e 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -817,7 +817,8 @@ void xen_load_linux(PCMachineState *pcms)
void pc_memory_init(PCMachineState *pcms,
MemoryRegion *system_memory,
MemoryRegion *rom_memory,
- MemoryRegion **ram_memory)
+ MemoryRegion **ram_memory,
+ uint64_t pci_hole64_size)
{
int linux_boot, i;
MemoryRegion *option_rom_mr;
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 6186a1473755..2a483e8666b4 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -91,6 +91,7 @@ static void pc_init1(MachineState *machine,
MemoryRegion *pci_memory;
MemoryRegion *rom_memory;
ram_addr_t lowmem;
+ uint64_t hole64_size;
DeviceState *i440fx_host;
/*
@@ -166,10 +167,14 @@ static void pc_init1(MachineState *machine,
memory_region_init(pci_memory, NULL, "pci", UINT64_MAX);
rom_memory = pci_memory;
i440fx_host = qdev_new(host_type);
+ hole64_size = object_property_get_uint(OBJECT(i440fx_host),
+ PCI_HOST_PROP_PCI_HOLE64_SIZE,
+ &error_abort);
} else {
pci_memory = NULL;
rom_memory = system_memory;
i440fx_host = NULL;
+ hole64_size = 0;
}
pc_guest_info_init(pcms);
@@ -186,7 +191,7 @@ static void pc_init1(MachineState *machine,
/* allocate ram and load rom/bios */
if (!xen_enabled()) {
pc_memory_init(pcms, system_memory,
- rom_memory, &ram_memory);
+ rom_memory, &ram_memory, hole64_size);
} else {
pc_system_flash_cleanup_unused(pcms);
if (machine->kernel_filename != NULL) {
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 46ea89e564de..99ed75371c67 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -138,6 +138,7 @@ static void pc_q35_init(MachineState *machine)
MachineClass *mc = MACHINE_GET_CLASS(machine);
bool acpi_pcihp;
bool keep_pci_slot_hpc;
+ uint64_t pci_hole64_size = 0;
/* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory
* and 256 Mbytes for PCI Express Enhanced Configuration Access Mapping
@@ -206,8 +207,15 @@ static void pc_q35_init(MachineState *machine)
/* create pci host bus */
q35_host = Q35_HOST_DEVICE(qdev_new(TYPE_Q35_HOST_DEVICE));
+ if (pcmc->pci_enabled) {
+ pci_hole64_size = object_property_get_uint(OBJECT(q35_host),
+ PCI_HOST_PROP_PCI_HOLE64_SIZE,
+ &error_abort);
+ }
+
/* allocate ram and load rom/bios */
- pc_memory_init(pcms, get_system_memory(), rom_memory, &ram_memory);
+ pc_memory_init(pcms, get_system_memory(), rom_memory, &ram_memory,
+ pci_hole64_size);
object_property_add_child(qdev_get_machine(), "q35", OBJECT(q35_host));
object_property_set_link(OBJECT(q35_host), MCH_HOST_PROP_RAM_MEM,
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index b7735dccfc81..568c226d3034 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -159,7 +159,8 @@ void xen_load_linux(PCMachineState *pcms);
void pc_memory_init(PCMachineState *pcms,
MemoryRegion *system_memory,
MemoryRegion *rom_memory,
- MemoryRegion **ram_memory);
+ MemoryRegion **ram_memory,
+ uint64_t pci_hole64_size);
uint64_t pc_pci_hole64_start(void);
DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus);
void pc_basic_device_init(struct PCMachineState *pcms,
--
2.17.2
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH v8 04/11] i386/pc: factor out above-4g end to an helper
2022-07-15 17:16 [PATCH v8 00/11] i386/pc: Fix creation of >= 1010G guests on AMD systems with IOMMU Joao Martins
` (2 preceding siblings ...)
2022-07-15 17:16 ` [PATCH v8 03/11] i386/pc: pass pci_hole64_size " Joao Martins
@ 2022-07-15 17:16 ` Joao Martins
2022-07-15 17:16 ` [PATCH v8 05/11] i386/pc: factor out cxl range end to helper Joao Martins
` (6 subsequent siblings)
10 siblings, 0 replies; 23+ messages in thread
From: Joao Martins @ 2022-07-15 17:16 UTC (permalink / raw)
To: qemu-devel
Cc: Igor Mammedov, Eduardo Habkost, Michael S. Tsirkin,
Richard Henderson, Alex Williamson, Paolo Bonzini, Ani Sinha,
Marcel Apfelbaum, Dr. David Alan Gilbert, Suravee Suthikulpanit,
Joao Martins
There's a couple of places that seem to duplicate this calculation
of RAM size above the 4G boundary. Move all those to a helper function.
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
---
hw/i386/pc.c | 27 ++++++++++++++-------------
1 file changed, 14 insertions(+), 13 deletions(-)
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index e952dc62a12e..216e38da938e 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -814,6 +814,17 @@ void xen_load_linux(PCMachineState *pcms)
#define PC_ROM_ALIGN 0x800
#define PC_ROM_SIZE (PC_ROM_MAX - PC_ROM_MIN_VGA)
+static hwaddr pc_above_4g_end(PCMachineState *pcms)
+{
+ X86MachineState *x86ms = X86_MACHINE(pcms);
+
+ if (pcms->sgx_epc.size != 0) {
+ return sgx_epc_above_4g_end(&pcms->sgx_epc);
+ }
+
+ return x86ms->above_4g_mem_start + x86ms->above_4g_mem_size;
+}
+
void pc_memory_init(PCMachineState *pcms,
MemoryRegion *system_memory,
MemoryRegion *rom_memory,
@@ -891,15 +902,8 @@ void pc_memory_init(PCMachineState *pcms,
exit(EXIT_FAILURE);
}
- if (pcms->sgx_epc.size != 0) {
- machine->device_memory->base = sgx_epc_above_4g_end(&pcms->sgx_epc);
- } else {
- machine->device_memory->base =
- x86ms->above_4g_mem_start + x86ms->above_4g_mem_size;
- }
-
machine->device_memory->base =
- ROUND_UP(machine->device_memory->base, 1 * GiB);
+ ROUND_UP(pc_above_4g_end(pcms), 1 * GiB);
if (pcmc->enforce_aligned_dimm) {
/* size device region assuming 1G page max alignment per slot */
@@ -929,7 +933,7 @@ void pc_memory_init(PCMachineState *pcms,
} else if (pcms->sgx_epc.size != 0) {
cxl_base = sgx_epc_above_4g_end(&pcms->sgx_epc);
} else {
- cxl_base = x86ms->above_4g_mem_start + x86ms->above_4g_mem_size;
+ cxl_base = pc_above_4g_end(pcms);
}
e820_add_entry(cxl_base, cxl_size, E820_RESERVED);
@@ -1016,7 +1020,6 @@ uint64_t pc_pci_hole64_start(void)
PCMachineState *pcms = PC_MACHINE(qdev_get_machine());
PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
MachineState *ms = MACHINE(pcms);
- X86MachineState *x86ms = X86_MACHINE(pcms);
uint64_t hole64_start = 0;
if (pcms->cxl_devices_state.host_mr.addr) {
@@ -1034,10 +1037,8 @@ uint64_t pc_pci_hole64_start(void)
if (!pcmc->broken_reserved_end) {
hole64_start += memory_region_size(&ms->device_memory->mr);
}
- } else if (pcms->sgx_epc.size != 0) {
- hole64_start = sgx_epc_above_4g_end(&pcms->sgx_epc);
} else {
- hole64_start = x86ms->above_4g_mem_start + x86ms->above_4g_mem_size;
+ hole64_start = pc_above_4g_end(pcms);
}
return ROUND_UP(hole64_start, 1 * GiB);
--
2.17.2
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH v8 05/11] i386/pc: factor out cxl range end to helper
2022-07-15 17:16 [PATCH v8 00/11] i386/pc: Fix creation of >= 1010G guests on AMD systems with IOMMU Joao Martins
` (3 preceding siblings ...)
2022-07-15 17:16 ` [PATCH v8 04/11] i386/pc: factor out above-4g end to an helper Joao Martins
@ 2022-07-15 17:16 ` Joao Martins
2022-07-18 12:53 ` Igor Mammedov
2022-07-15 17:16 ` [PATCH v8 06/11] i386/pc: factor out cxl range start " Joao Martins
` (5 subsequent siblings)
10 siblings, 1 reply; 23+ messages in thread
From: Joao Martins @ 2022-07-15 17:16 UTC (permalink / raw)
To: qemu-devel
Cc: Igor Mammedov, Eduardo Habkost, Michael S. Tsirkin,
Richard Henderson, Alex Williamson, Paolo Bonzini, Ani Sinha,
Marcel Apfelbaum, Dr. David Alan Gilbert, Suravee Suthikulpanit,
Joao Martins, Jonathan Cameron
Move calculation of CXL memory region end to separate helper.
This is in preparation to a future change that removes CXL range
dependency on the CXL memory region, with the goal of allowing
pc_pci_hole64_start() to be called before any memory region are
initialized.
Cc: Jonathan Cameron <jonathan.cameron@huawei.com>
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
---
hw/i386/pc.c | 31 +++++++++++++++++++++----------
1 file changed, 21 insertions(+), 10 deletions(-)
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 216e38da938e..1f42f194d7b7 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -825,6 +825,25 @@ static hwaddr pc_above_4g_end(PCMachineState *pcms)
return x86ms->above_4g_mem_start + x86ms->above_4g_mem_size;
}
+static uint64_t pc_get_cxl_range_end(PCMachineState *pcms)
+{
+ uint64_t start = 0;
+
+ if (pcms->cxl_devices_state.host_mr.addr) {
+ start = pcms->cxl_devices_state.host_mr.addr +
+ memory_region_size(&pcms->cxl_devices_state.host_mr);
+ if (pcms->cxl_devices_state.fixed_windows) {
+ GList *it;
+ for (it = pcms->cxl_devices_state.fixed_windows; it; it = it->next) {
+ CXLFixedWindow *fw = it->data;
+ start = fw->mr.addr + memory_region_size(&fw->mr);
+ }
+ }
+ }
+
+ return start;
+}
+
void pc_memory_init(PCMachineState *pcms,
MemoryRegion *system_memory,
MemoryRegion *rom_memory,
@@ -1022,16 +1041,8 @@ uint64_t pc_pci_hole64_start(void)
MachineState *ms = MACHINE(pcms);
uint64_t hole64_start = 0;
- if (pcms->cxl_devices_state.host_mr.addr) {
- hole64_start = pcms->cxl_devices_state.host_mr.addr +
- memory_region_size(&pcms->cxl_devices_state.host_mr);
- if (pcms->cxl_devices_state.fixed_windows) {
- GList *it;
- for (it = pcms->cxl_devices_state.fixed_windows; it; it = it->next) {
- CXLFixedWindow *fw = it->data;
- hole64_start = fw->mr.addr + memory_region_size(&fw->mr);
- }
- }
+ if (pcms->cxl_devices_state.is_enabled) {
+ hole64_start = pc_get_cxl_range_end(pcms);
} else if (pcmc->has_reserved_memory && ms->device_memory->base) {
hole64_start = ms->device_memory->base;
if (!pcmc->broken_reserved_end) {
--
2.17.2
^ permalink raw reply related [flat|nested] 23+ messages in thread
* Re: [PATCH v8 05/11] i386/pc: factor out cxl range end to helper
2022-07-15 17:16 ` [PATCH v8 05/11] i386/pc: factor out cxl range end to helper Joao Martins
@ 2022-07-18 12:53 ` Igor Mammedov
0 siblings, 0 replies; 23+ messages in thread
From: Igor Mammedov @ 2022-07-18 12:53 UTC (permalink / raw)
To: Joao Martins
Cc: qemu-devel, Eduardo Habkost, Michael S. Tsirkin,
Richard Henderson, Alex Williamson, Paolo Bonzini, Ani Sinha,
Marcel Apfelbaum, Dr. David Alan Gilbert, Suravee Suthikulpanit,
Jonathan Cameron
On Fri, 15 Jul 2022 18:16:22 +0100
Joao Martins <joao.m.martins@oracle.com> wrote:
> Move calculation of CXL memory region end to separate helper.
>
> This is in preparation to a future change that removes CXL range
> dependency on the CXL memory region, with the goal of allowing
> pc_pci_hole64_start() to be called before any memory region are
> initialized.
>
> Cc: Jonathan Cameron <jonathan.cameron@huawei.com>
> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Acked-by: Igor Mammedov <imammedo@redhat.com>
> ---
> hw/i386/pc.c | 31 +++++++++++++++++++++----------
> 1 file changed, 21 insertions(+), 10 deletions(-)
>
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index 216e38da938e..1f42f194d7b7 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -825,6 +825,25 @@ static hwaddr pc_above_4g_end(PCMachineState *pcms)
> return x86ms->above_4g_mem_start + x86ms->above_4g_mem_size;
> }
>
> +static uint64_t pc_get_cxl_range_end(PCMachineState *pcms)
> +{
> + uint64_t start = 0;
> +
> + if (pcms->cxl_devices_state.host_mr.addr) {
> + start = pcms->cxl_devices_state.host_mr.addr +
> + memory_region_size(&pcms->cxl_devices_state.host_mr);
> + if (pcms->cxl_devices_state.fixed_windows) {
> + GList *it;
> + for (it = pcms->cxl_devices_state.fixed_windows; it; it = it->next) {
> + CXLFixedWindow *fw = it->data;
> + start = fw->mr.addr + memory_region_size(&fw->mr);
> + }
> + }
> + }
> +
> + return start;
> +}
> +
> void pc_memory_init(PCMachineState *pcms,
> MemoryRegion *system_memory,
> MemoryRegion *rom_memory,
> @@ -1022,16 +1041,8 @@ uint64_t pc_pci_hole64_start(void)
> MachineState *ms = MACHINE(pcms);
> uint64_t hole64_start = 0;
>
> - if (pcms->cxl_devices_state.host_mr.addr) {
> - hole64_start = pcms->cxl_devices_state.host_mr.addr +
> - memory_region_size(&pcms->cxl_devices_state.host_mr);
> - if (pcms->cxl_devices_state.fixed_windows) {
> - GList *it;
> - for (it = pcms->cxl_devices_state.fixed_windows; it; it = it->next) {
> - CXLFixedWindow *fw = it->data;
> - hole64_start = fw->mr.addr + memory_region_size(&fw->mr);
> - }
> - }
> + if (pcms->cxl_devices_state.is_enabled) {
> + hole64_start = pc_get_cxl_range_end(pcms);
> } else if (pcmc->has_reserved_memory && ms->device_memory->base) {
> hole64_start = ms->device_memory->base;
> if (!pcmc->broken_reserved_end) {
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH v8 06/11] i386/pc: factor out cxl range start to helper
2022-07-15 17:16 [PATCH v8 00/11] i386/pc: Fix creation of >= 1010G guests on AMD systems with IOMMU Joao Martins
` (4 preceding siblings ...)
2022-07-15 17:16 ` [PATCH v8 05/11] i386/pc: factor out cxl range end to helper Joao Martins
@ 2022-07-15 17:16 ` Joao Martins
2022-07-18 12:52 ` Igor Mammedov
2022-07-15 17:16 ` [PATCH v8 07/11] i386/pc: handle unitialized mr in pc_get_cxl_range_end() Joao Martins
` (4 subsequent siblings)
10 siblings, 1 reply; 23+ messages in thread
From: Joao Martins @ 2022-07-15 17:16 UTC (permalink / raw)
To: qemu-devel
Cc: Igor Mammedov, Eduardo Habkost, Michael S. Tsirkin,
Richard Henderson, Alex Williamson, Paolo Bonzini, Ani Sinha,
Marcel Apfelbaum, Dr. David Alan Gilbert, Suravee Suthikulpanit,
Joao Martins, Jonathan Cameron
Factor out the calculation of the base address of the memory region.
It will be used later on for the cxl range end counterpart calculation
and as well in pc_memory_init() CXL memory region initialization, thus
avoiding duplication.
Cc: Jonathan Cameron <jonathan.cameron@huawei.com>
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
---
hw/i386/pc.c | 26 +++++++++++++++++---------
1 file changed, 17 insertions(+), 9 deletions(-)
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 1f42f194d7b7..3fdcab4bb4f3 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -825,6 +825,22 @@ static hwaddr pc_above_4g_end(PCMachineState *pcms)
return x86ms->above_4g_mem_start + x86ms->above_4g_mem_size;
}
+static uint64_t pc_get_cxl_range_start(PCMachineState *pcms)
+{
+ PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
+ MachineState *machine = MACHINE(pcms);
+ hwaddr cxl_base;
+
+ if (pcmc->has_reserved_memory && machine->device_memory->base) {
+ cxl_base = machine->device_memory->base
+ + memory_region_size(&machine->device_memory->mr);
+ } else {
+ cxl_base = pc_above_4g_end(pcms);
+ }
+
+ return cxl_base;
+}
+
static uint64_t pc_get_cxl_range_end(PCMachineState *pcms)
{
uint64_t start = 0;
@@ -946,15 +962,7 @@ void pc_memory_init(PCMachineState *pcms,
MemoryRegion *mr = &pcms->cxl_devices_state.host_mr;
hwaddr cxl_size = MiB;
- if (pcmc->has_reserved_memory && machine->device_memory->base) {
- cxl_base = machine->device_memory->base
- + memory_region_size(&machine->device_memory->mr);
- } else if (pcms->sgx_epc.size != 0) {
- cxl_base = sgx_epc_above_4g_end(&pcms->sgx_epc);
- } else {
- cxl_base = pc_above_4g_end(pcms);
- }
-
+ cxl_base = pc_get_cxl_range_start(pcms);
e820_add_entry(cxl_base, cxl_size, E820_RESERVED);
memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size);
memory_region_add_subregion(system_memory, cxl_base, mr);
--
2.17.2
^ permalink raw reply related [flat|nested] 23+ messages in thread
* Re: [PATCH v8 06/11] i386/pc: factor out cxl range start to helper
2022-07-15 17:16 ` [PATCH v8 06/11] i386/pc: factor out cxl range start " Joao Martins
@ 2022-07-18 12:52 ` Igor Mammedov
2022-07-18 13:51 ` Joao Martins
0 siblings, 1 reply; 23+ messages in thread
From: Igor Mammedov @ 2022-07-18 12:52 UTC (permalink / raw)
To: Joao Martins
Cc: qemu-devel, Eduardo Habkost, Michael S. Tsirkin,
Richard Henderson, Alex Williamson, Paolo Bonzini, Ani Sinha,
Marcel Apfelbaum, Dr. David Alan Gilbert, Suravee Suthikulpanit,
Jonathan Cameron
On Fri, 15 Jul 2022 18:16:23 +0100
Joao Martins <joao.m.martins@oracle.com> wrote:
> Factor out the calculation of the base address of the memory region.
> It will be used later on for the cxl range end counterpart calculation
> and as well in pc_memory_init() CXL memory region initialization, thus
> avoiding duplication.
>
> Cc: Jonathan Cameron <jonathan.cameron@huawei.com>
> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Acked-by: Igor Mammedov <imammedo@redhat.com>
PS:
see note below in case series respin
> ---
> hw/i386/pc.c | 26 +++++++++++++++++---------
> 1 file changed, 17 insertions(+), 9 deletions(-)
>
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index 1f42f194d7b7..3fdcab4bb4f3 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -825,6 +825,22 @@ static hwaddr pc_above_4g_end(PCMachineState *pcms)
> return x86ms->above_4g_mem_start + x86ms->above_4g_mem_size;
> }
>
> +static uint64_t pc_get_cxl_range_start(PCMachineState *pcms)
> +{
> + PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
> + MachineState *machine = MACHINE(pcms);
> + hwaddr cxl_base;
> +
> + if (pcmc->has_reserved_memory && machine->device_memory->base) {
> + cxl_base = machine->device_memory->base
> + + memory_region_size(&machine->device_memory->mr);
> + } else {
> + cxl_base = pc_above_4g_end(pcms);
> + }
> +
> + return cxl_base;
> +}
> +
> static uint64_t pc_get_cxl_range_end(PCMachineState *pcms)
> {
> uint64_t start = 0;
> @@ -946,15 +962,7 @@ void pc_memory_init(PCMachineState *pcms,
> MemoryRegion *mr = &pcms->cxl_devices_state.host_mr;
> hwaddr cxl_size = MiB;
>
> - if (pcmc->has_reserved_memory && machine->device_memory->base) {
> - cxl_base = machine->device_memory->base
> - + memory_region_size(&machine->device_memory->mr);
> - } else if (pcms->sgx_epc.size != 0) {
> - cxl_base = sgx_epc_above_4g_end(&pcms->sgx_epc);
> - } else {
shouldn't be this hunk be a part of 4/11?
(otherwise it looks like it's been dropped by mistake)
end result is fine as pc_above_4g_end() already has this hunk (hence Ack)
> - cxl_base = pc_above_4g_end(pcms);
> - }
> -
> + cxl_base = pc_get_cxl_range_start(pcms);
> e820_add_entry(cxl_base, cxl_size, E820_RESERVED);
> memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size);
> memory_region_add_subregion(system_memory, cxl_base, mr);
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH v8 06/11] i386/pc: factor out cxl range start to helper
2022-07-18 12:52 ` Igor Mammedov
@ 2022-07-18 13:51 ` Joao Martins
0 siblings, 0 replies; 23+ messages in thread
From: Joao Martins @ 2022-07-18 13:51 UTC (permalink / raw)
To: Igor Mammedov
Cc: qemu-devel, Eduardo Habkost, Michael S. Tsirkin,
Richard Henderson, Alex Williamson, Paolo Bonzini, Ani Sinha,
Marcel Apfelbaum, Dr. David Alan Gilbert, Suravee Suthikulpanit,
Jonathan Cameron
On 7/18/22 13:52, Igor Mammedov wrote:
> On Fri, 15 Jul 2022 18:16:23 +0100
> Joao Martins <joao.m.martins@oracle.com> wrote:
>
>> Factor out the calculation of the base address of the memory region.
>> It will be used later on for the cxl range end counterpart calculation
>> and as well in pc_memory_init() CXL memory region initialization, thus
>> avoiding duplication.
>>
>> Cc: Jonathan Cameron <jonathan.cameron@huawei.com>
>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>
> Acked-by: Igor Mammedov <imammedo@redhat.com>
>
Thanks!
> PS:
> see note below in case series respin
>
>> ---
>> hw/i386/pc.c | 26 +++++++++++++++++---------
>> 1 file changed, 17 insertions(+), 9 deletions(-)
>>
>> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
>> index 1f42f194d7b7..3fdcab4bb4f3 100644
>> --- a/hw/i386/pc.c
>> +++ b/hw/i386/pc.c
>> @@ -825,6 +825,22 @@ static hwaddr pc_above_4g_end(PCMachineState *pcms)
>> return x86ms->above_4g_mem_start + x86ms->above_4g_mem_size;
>> }
>>
>> +static uint64_t pc_get_cxl_range_start(PCMachineState *pcms)
>> +{
>> + PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
>> + MachineState *machine = MACHINE(pcms);
>> + hwaddr cxl_base;
>> +
>> + if (pcmc->has_reserved_memory && machine->device_memory->base) {
>> + cxl_base = machine->device_memory->base
>> + + memory_region_size(&machine->device_memory->mr);
>> + } else {
>> + cxl_base = pc_above_4g_end(pcms);
>> + }
>> +
>> + return cxl_base;
>> +}
>> +
>> static uint64_t pc_get_cxl_range_end(PCMachineState *pcms)
>> {
>> uint64_t start = 0;
>> @@ -946,15 +962,7 @@ void pc_memory_init(PCMachineState *pcms,
>> MemoryRegion *mr = &pcms->cxl_devices_state.host_mr;
>> hwaddr cxl_size = MiB;
>>
>> - if (pcmc->has_reserved_memory && machine->device_memory->base) {
>> - cxl_base = machine->device_memory->base
>> - + memory_region_size(&machine->device_memory->mr);
>
>> - } else if (pcms->sgx_epc.size != 0) {
>> - cxl_base = sgx_epc_above_4g_end(&pcms->sgx_epc);
>> - } else {
> shouldn't be this hunk be a part of 4/11?
> (otherwise it looks like it's been dropped by mistake)
It is a mistake :/ in v8 I must have forgot to delete those 2 lines upon conflict
resolution.
> end result is fine as pc_above_4g_end() already has this hunk (hence Ack)
>
Let me fix that for the next respin.
>> - cxl_base = pc_above_4g_end(pcms);
>> - }
>> -
>> + cxl_base = pc_get_cxl_range_start(pcms);
>> e820_add_entry(cxl_base, cxl_size, E820_RESERVED);
>> memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size);
>> memory_region_add_subregion(system_memory, cxl_base, mr);
>
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH v8 07/11] i386/pc: handle unitialized mr in pc_get_cxl_range_end()
2022-07-15 17:16 [PATCH v8 00/11] i386/pc: Fix creation of >= 1010G guests on AMD systems with IOMMU Joao Martins
` (5 preceding siblings ...)
2022-07-15 17:16 ` [PATCH v8 06/11] i386/pc: factor out cxl range start " Joao Martins
@ 2022-07-15 17:16 ` Joao Martins
2022-07-18 12:58 ` Igor Mammedov
2022-07-15 17:16 ` [PATCH v8 08/11] i386/pc: factor out device_memory base/size to helper Joao Martins
` (3 subsequent siblings)
10 siblings, 1 reply; 23+ messages in thread
From: Joao Martins @ 2022-07-15 17:16 UTC (permalink / raw)
To: qemu-devel
Cc: Igor Mammedov, Eduardo Habkost, Michael S. Tsirkin,
Richard Henderson, Alex Williamson, Paolo Bonzini, Ani Sinha,
Marcel Apfelbaum, Dr. David Alan Gilbert, Suravee Suthikulpanit,
Joao Martins, Jonathan Cameron
Remove pc_get_cxl_range_end() dependency on the CXL memory region,
and replace with one that does not require the CXL host_mr to determine
the start of CXL start.
This in preparation to allow pc_pci_hole64_start() to be called early
in pc_memory_init(), handle CXL memory region end when its underlying
memory region isn't yet initialized.
Cc: Jonathan Cameron <jonathan.cameron@huawei.com>
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
---
hw/i386/pc.c | 18 ++++++++----------
1 file changed, 8 insertions(+), 10 deletions(-)
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 3fdcab4bb4f3..c654be6cf0bd 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -843,17 +843,15 @@ static uint64_t pc_get_cxl_range_start(PCMachineState *pcms)
static uint64_t pc_get_cxl_range_end(PCMachineState *pcms)
{
- uint64_t start = 0;
+ uint64_t start = pc_get_cxl_range_start(pcms) + MiB;
- if (pcms->cxl_devices_state.host_mr.addr) {
- start = pcms->cxl_devices_state.host_mr.addr +
- memory_region_size(&pcms->cxl_devices_state.host_mr);
- if (pcms->cxl_devices_state.fixed_windows) {
- GList *it;
- for (it = pcms->cxl_devices_state.fixed_windows; it; it = it->next) {
- CXLFixedWindow *fw = it->data;
- start = fw->mr.addr + memory_region_size(&fw->mr);
- }
+ if (pcms->cxl_devices_state.fixed_windows) {
+ GList *it;
+
+ start = ROUND_UP(start, 256 * MiB);
+ for (it = pcms->cxl_devices_state.fixed_windows; it; it = it->next) {
+ CXLFixedWindow *fw = it->data;
+ start += fw->size;
}
}
--
2.17.2
^ permalink raw reply related [flat|nested] 23+ messages in thread
* Re: [PATCH v8 07/11] i386/pc: handle unitialized mr in pc_get_cxl_range_end()
2022-07-15 17:16 ` [PATCH v8 07/11] i386/pc: handle unitialized mr in pc_get_cxl_range_end() Joao Martins
@ 2022-07-18 12:58 ` Igor Mammedov
2022-07-18 13:55 ` Joao Martins
0 siblings, 1 reply; 23+ messages in thread
From: Igor Mammedov @ 2022-07-18 12:58 UTC (permalink / raw)
To: Joao Martins
Cc: qemu-devel, Eduardo Habkost, Michael S. Tsirkin,
Richard Henderson, Alex Williamson, Paolo Bonzini, Ani Sinha,
Marcel Apfelbaum, Dr. David Alan Gilbert, Suravee Suthikulpanit,
Jonathan Cameron
On Fri, 15 Jul 2022 18:16:24 +0100
Joao Martins <joao.m.martins@oracle.com> wrote:
> Remove pc_get_cxl_range_end() dependency on the CXL memory region,
> and replace with one that does not require the CXL host_mr to determine
> the start of CXL start.
>
> This in preparation to allow pc_pci_hole64_start() to be called early
> in pc_memory_init(), handle CXL memory region end when its underlying
> memory region isn't yet initialized.
>
> Cc: Jonathan Cameron <jonathan.cameron@huawei.com>
> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
> ---
> hw/i386/pc.c | 18 ++++++++----------
> 1 file changed, 8 insertions(+), 10 deletions(-)
>
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index 3fdcab4bb4f3..c654be6cf0bd 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -843,17 +843,15 @@ static uint64_t pc_get_cxl_range_start(PCMachineState *pcms)
>
> static uint64_t pc_get_cxl_range_end(PCMachineState *pcms)
> {
> - uint64_t start = 0;
> + uint64_t start = pc_get_cxl_range_start(pcms) + MiB;
^^^^^
why it's here?
>
> - if (pcms->cxl_devices_state.host_mr.addr) {
> - start = pcms->cxl_devices_state.host_mr.addr +
> - memory_region_size(&pcms->cxl_devices_state.host_mr);
> - if (pcms->cxl_devices_state.fixed_windows) {
> - GList *it;
> - for (it = pcms->cxl_devices_state.fixed_windows; it; it = it->next) {
> - CXLFixedWindow *fw = it->data;
> - start = fw->mr.addr + memory_region_size(&fw->mr);
> - }
> + if (pcms->cxl_devices_state.fixed_windows) {
> + GList *it;
> +
> + start = ROUND_UP(start, 256 * MiB);
and also this unexplained alignment.
> + for (it = pcms->cxl_devices_state.fixed_windows; it; it = it->next) {
> + CXLFixedWindow *fw = it->data;
> + start += fw->size;
> }
> }
>
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH v8 07/11] i386/pc: handle unitialized mr in pc_get_cxl_range_end()
2022-07-18 12:58 ` Igor Mammedov
@ 2022-07-18 13:55 ` Joao Martins
0 siblings, 0 replies; 23+ messages in thread
From: Joao Martins @ 2022-07-18 13:55 UTC (permalink / raw)
To: Igor Mammedov
Cc: qemu-devel, Eduardo Habkost, Michael S. Tsirkin,
Richard Henderson, Alex Williamson, Paolo Bonzini, Ani Sinha,
Marcel Apfelbaum, Dr. David Alan Gilbert, Suravee Suthikulpanit,
Jonathan Cameron
On 7/18/22 13:58, Igor Mammedov wrote:
> On Fri, 15 Jul 2022 18:16:24 +0100
> Joao Martins <joao.m.martins@oracle.com> wrote:
>
>> Remove pc_get_cxl_range_end() dependency on the CXL memory region,
>> and replace with one that does not require the CXL host_mr to determine
>> the start of CXL start.
>>
>> This in preparation to allow pc_pci_hole64_start() to be called early
>> in pc_memory_init(), handle CXL memory region end when its underlying
>> memory region isn't yet initialized.
>>
>> Cc: Jonathan Cameron <jonathan.cameron@huawei.com>
>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>> ---
>> hw/i386/pc.c | 18 ++++++++----------
>> 1 file changed, 8 insertions(+), 10 deletions(-)
>>
>> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
>> index 3fdcab4bb4f3..c654be6cf0bd 100644
>> --- a/hw/i386/pc.c
>> +++ b/hw/i386/pc.c
>> @@ -843,17 +843,15 @@ static uint64_t pc_get_cxl_range_start(PCMachineState *pcms)
>>
>> static uint64_t pc_get_cxl_range_end(PCMachineState *pcms)
>> {
>> - uint64_t start = 0;
>> + uint64_t start = pc_get_cxl_range_start(pcms) + MiB;
> ^^^^^
> why it's here?
>
MiB is the size of CXL region
It's essentially logic inherited by pc_memory_init() that got replaced by
cxl_range_start():
@@ -946,15 +962,7 @@ void pc_memory_init(PCMachineState *pcms,
MemoryRegion *mr = &pcms->cxl_devices_state.host_mr;
hwaddr cxl_size = MiB;
^^^^^^^^^^^^^^^^^^^^^^
- if (pcmc->has_reserved_memory && machine->device_memory->base) {
- cxl_base = machine->device_memory->base
- + memory_region_size(&machine->device_memory->mr);
- } else if (pcms->sgx_epc.size != 0) {
- cxl_base = sgx_epc_above_4g_end(&pcms->sgx_epc);
- } else {
- cxl_base = pc_above_4g_end(pcms);
- }
-
+ cxl_base = pc_get_cxl_range_start(pcms);
>>
>> - if (pcms->cxl_devices_state.host_mr.addr) {
>> - start = pcms->cxl_devices_state.host_mr.addr +
>> - memory_region_size(&pcms->cxl_devices_state.host_mr);
>> - if (pcms->cxl_devices_state.fixed_windows) {
>> - GList *it;
>> - for (it = pcms->cxl_devices_state.fixed_windows; it; it = it->next) {
>> - CXLFixedWindow *fw = it->data;
>> - start = fw->mr.addr + memory_region_size(&fw->mr);
>> - }
>> + if (pcms->cxl_devices_state.fixed_windows) {
>> + GList *it;
>> +
>> + start = ROUND_UP(start, 256 * MiB);
>
> and also this unexplained alignment.
>
It's part of what CXL fixed windows logic in pc_memory_init().
And the hunks I added is the same calculation.
Let me copy here below:
cxl_fmw_base = ROUND_UP(cxl_base + cxl_size, 256 * MiB);
for (it = pcms->cxl_devices_state.fixed_windows; it; it = it->next) {
CXLFixedWindow *fw = it->data;
fw->base = cxl_fmw_base;
memory_region_init_io(&fw->mr, OBJECT(machine), &cfmws_ops, fw,
"cxl-fixed-memory-region", fw->size);
memory_region_add_subregion(system_memory, fw->base, &fw->mr);
e820_add_entry(fw->base, fw->size, E820_RESERVED);
cxl_fmw_base += fw->size;
cxl_resv_end = cxl_fmw_base;
}
>> + for (it = pcms->cxl_devices_state.fixed_windows; it; it = it->next) {
>> + CXLFixedWindow *fw = it->data;
>> + start += fw->size;
>> }
>> }
>>
>
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH v8 08/11] i386/pc: factor out device_memory base/size to helper
2022-07-15 17:16 [PATCH v8 00/11] i386/pc: Fix creation of >= 1010G guests on AMD systems with IOMMU Joao Martins
` (6 preceding siblings ...)
2022-07-15 17:16 ` [PATCH v8 07/11] i386/pc: handle unitialized mr in pc_get_cxl_range_end() Joao Martins
@ 2022-07-15 17:16 ` Joao Martins
2022-07-18 13:03 ` Igor Mammedov
2022-07-15 17:16 ` [PATCH v8 09/11] i386/pc: bounds check phys-bits against max used GPA Joao Martins
` (2 subsequent siblings)
10 siblings, 1 reply; 23+ messages in thread
From: Joao Martins @ 2022-07-15 17:16 UTC (permalink / raw)
To: qemu-devel
Cc: Igor Mammedov, Eduardo Habkost, Michael S. Tsirkin,
Richard Henderson, Alex Williamson, Paolo Bonzini, Ani Sinha,
Marcel Apfelbaum, Dr. David Alan Gilbert, Suravee Suthikulpanit,
Joao Martins, Jonathan Cameron
Move obtaining hole64_start from device_memory memory region base/size
into an helper alongside correspondent getters in pc_memory_init() when
the hotplug range is unitialized. While doing that remove the memory
region based logic from this newly added helper.
This is the final step that allows pc_pci_hole64_start() to be callable
at the beginning of pc_memory_init() before any memory regions are
initialized.
Cc: Jonathan Cameron <jonathan.cameron@huawei.com>
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
---
hw/i386/pc.c | 47 ++++++++++++++++++++++++++++++++---------------
1 file changed, 32 insertions(+), 15 deletions(-)
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index c654be6cf0bd..cda435e3baeb 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -825,15 +825,37 @@ static hwaddr pc_above_4g_end(PCMachineState *pcms)
return x86ms->above_4g_mem_start + x86ms->above_4g_mem_size;
}
-static uint64_t pc_get_cxl_range_start(PCMachineState *pcms)
+static void pc_get_device_memory_range(PCMachineState *pcms,
+ hwaddr *base,
+ ram_addr_t *device_mem_size)
{
PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
MachineState *machine = MACHINE(pcms);
+ ram_addr_t size;
+ hwaddr addr;
+
+ size = machine->maxram_size - machine->ram_size;
+ addr = ROUND_UP(pc_above_4g_end(pcms), 1 * GiB);
+
+ if (pcmc->enforce_aligned_dimm) {
+ /* size device region assuming 1G page max alignment per slot */
+ size += (1 * GiB) * machine->ram_slots;
+ }
+
+ *base = addr;
+ *device_mem_size = size;
+}
+
+
+static uint64_t pc_get_cxl_range_start(PCMachineState *pcms)
+{
+ PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
hwaddr cxl_base;
+ ram_addr_t size;
- if (pcmc->has_reserved_memory && machine->device_memory->base) {
- cxl_base = machine->device_memory->base
- + memory_region_size(&machine->device_memory->mr);
+ if (pcmc->has_reserved_memory) {
+ pc_get_device_memory_range(pcms, &cxl_base, &size);
+ cxl_base += size;
} else {
cxl_base = pc_above_4g_end(pcms);
}
@@ -920,7 +942,7 @@ void pc_memory_init(PCMachineState *pcms,
/* initialize device memory address space */
if (pcmc->has_reserved_memory &&
(machine->ram_size < machine->maxram_size)) {
- ram_addr_t device_mem_size = machine->maxram_size - machine->ram_size;
+ ram_addr_t device_mem_size;
if (machine->ram_slots > ACPI_MAX_RAM_SLOTS) {
error_report("unsupported amount of memory slots: %"PRIu64,
@@ -935,13 +957,7 @@ void pc_memory_init(PCMachineState *pcms,
exit(EXIT_FAILURE);
}
- machine->device_memory->base =
- ROUND_UP(pc_above_4g_end(pcms), 1 * GiB);
-
- if (pcmc->enforce_aligned_dimm) {
- /* size device region assuming 1G page max alignment per slot */
- device_mem_size += (1 * GiB) * machine->ram_slots;
- }
+ pc_get_device_memory_range(pcms, &machine->device_memory->base, &device_mem_size);
if ((machine->device_memory->base + device_mem_size) <
device_mem_size) {
@@ -1046,13 +1062,14 @@ uint64_t pc_pci_hole64_start(void)
PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
MachineState *ms = MACHINE(pcms);
uint64_t hole64_start = 0;
+ ram_addr_t size = 0;
if (pcms->cxl_devices_state.is_enabled) {
hole64_start = pc_get_cxl_range_end(pcms);
- } else if (pcmc->has_reserved_memory && ms->device_memory->base) {
- hole64_start = ms->device_memory->base;
+ } else if (pcmc->has_reserved_memory && (ms->ram_size < ms->maxram_size)) {
+ pc_get_device_memory_range(pcms, &hole64_start, &size);
if (!pcmc->broken_reserved_end) {
- hole64_start += memory_region_size(&ms->device_memory->mr);
+ hole64_start += size;
}
} else {
hole64_start = pc_above_4g_end(pcms);
--
2.17.2
^ permalink raw reply related [flat|nested] 23+ messages in thread
* Re: [PATCH v8 08/11] i386/pc: factor out device_memory base/size to helper
2022-07-15 17:16 ` [PATCH v8 08/11] i386/pc: factor out device_memory base/size to helper Joao Martins
@ 2022-07-18 13:03 ` Igor Mammedov
2022-07-18 14:22 ` Joao Martins
0 siblings, 1 reply; 23+ messages in thread
From: Igor Mammedov @ 2022-07-18 13:03 UTC (permalink / raw)
To: Joao Martins
Cc: qemu-devel, Eduardo Habkost, Michael S. Tsirkin,
Richard Henderson, Alex Williamson, Paolo Bonzini, Ani Sinha,
Marcel Apfelbaum, Dr. David Alan Gilbert, Suravee Suthikulpanit,
Jonathan Cameron
On Fri, 15 Jul 2022 18:16:25 +0100
Joao Martins <joao.m.martins@oracle.com> wrote:
> Move obtaining hole64_start from device_memory memory region base/size
> into an helper alongside correspondent getters in pc_memory_init() when
> the hotplug range is unitialized. While doing that remove the memory
> region based logic from this newly added helper.
>
> This is the final step that allows pc_pci_hole64_start() to be callable
> at the beginning of pc_memory_init() before any memory regions are
> initialized.
>
> Cc: Jonathan Cameron <jonathan.cameron@huawei.com>
> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Acked-by: Igor Mammedov <imammedo@redhat.com>
> ---
> hw/i386/pc.c | 47 ++++++++++++++++++++++++++++++++---------------
> 1 file changed, 32 insertions(+), 15 deletions(-)
>
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index c654be6cf0bd..cda435e3baeb 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -825,15 +825,37 @@ static hwaddr pc_above_4g_end(PCMachineState *pcms)
> return x86ms->above_4g_mem_start + x86ms->above_4g_mem_size;
> }
>
> -static uint64_t pc_get_cxl_range_start(PCMachineState *pcms)
> +static void pc_get_device_memory_range(PCMachineState *pcms,
> + hwaddr *base,
> + ram_addr_t *device_mem_size)
> {
> PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
> MachineState *machine = MACHINE(pcms);
> + ram_addr_t size;
> + hwaddr addr;
> +
> + size = machine->maxram_size - machine->ram_size;
> + addr = ROUND_UP(pc_above_4g_end(pcms), 1 * GiB);
> +
> + if (pcmc->enforce_aligned_dimm) {
> + /* size device region assuming 1G page max alignment per slot */
> + size += (1 * GiB) * machine->ram_slots;
> + }
> +
> + *base = addr;
> + *device_mem_size = size;
> +}
> +
> +
stray newline
> +static uint64_t pc_get_cxl_range_start(PCMachineState *pcms)
> +{
> + PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
> hwaddr cxl_base;
> + ram_addr_t size;
>
> - if (pcmc->has_reserved_memory && machine->device_memory->base) {
> - cxl_base = machine->device_memory->base
> - + memory_region_size(&machine->device_memory->mr);
> + if (pcmc->has_reserved_memory) {
> + pc_get_device_memory_range(pcms, &cxl_base, &size);
> + cxl_base += size;
> } else {
> cxl_base = pc_above_4g_end(pcms);
> }
> @@ -920,7 +942,7 @@ void pc_memory_init(PCMachineState *pcms,
> /* initialize device memory address space */
> if (pcmc->has_reserved_memory &&
> (machine->ram_size < machine->maxram_size)) {
> - ram_addr_t device_mem_size = machine->maxram_size - machine->ram_size;
> + ram_addr_t device_mem_size;
>
> if (machine->ram_slots > ACPI_MAX_RAM_SLOTS) {
> error_report("unsupported amount of memory slots: %"PRIu64,
> @@ -935,13 +957,7 @@ void pc_memory_init(PCMachineState *pcms,
> exit(EXIT_FAILURE);
> }
>
> - machine->device_memory->base =
> - ROUND_UP(pc_above_4g_end(pcms), 1 * GiB);
> -
> - if (pcmc->enforce_aligned_dimm) {
> - /* size device region assuming 1G page max alignment per slot */
> - device_mem_size += (1 * GiB) * machine->ram_slots;
> - }
> + pc_get_device_memory_range(pcms, &machine->device_memory->base, &device_mem_size);
>
> if ((machine->device_memory->base + device_mem_size) <
> device_mem_size) {
> @@ -1046,13 +1062,14 @@ uint64_t pc_pci_hole64_start(void)
> PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
> MachineState *ms = MACHINE(pcms);
> uint64_t hole64_start = 0;
> + ram_addr_t size = 0;
>
> if (pcms->cxl_devices_state.is_enabled) {
> hole64_start = pc_get_cxl_range_end(pcms);
> - } else if (pcmc->has_reserved_memory && ms->device_memory->base) {
> - hole64_start = ms->device_memory->base;
> + } else if (pcmc->has_reserved_memory && (ms->ram_size < ms->maxram_size)) {
> + pc_get_device_memory_range(pcms, &hole64_start, &size);
> if (!pcmc->broken_reserved_end) {
> - hole64_start += memory_region_size(&ms->device_memory->mr);
> + hole64_start += size;
> }
> } else {
> hole64_start = pc_above_4g_end(pcms);
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH v8 08/11] i386/pc: factor out device_memory base/size to helper
2022-07-18 13:03 ` Igor Mammedov
@ 2022-07-18 14:22 ` Joao Martins
0 siblings, 0 replies; 23+ messages in thread
From: Joao Martins @ 2022-07-18 14:22 UTC (permalink / raw)
To: Igor Mammedov
Cc: qemu-devel, Eduardo Habkost, Michael S. Tsirkin,
Richard Henderson, Alex Williamson, Paolo Bonzini, Ani Sinha,
Marcel Apfelbaum, Dr. David Alan Gilbert, Suravee Suthikulpanit,
Jonathan Cameron
On 7/18/22 14:03, Igor Mammedov wrote:
> On Fri, 15 Jul 2022 18:16:25 +0100
> Joao Martins <joao.m.martins@oracle.com> wrote:
>
>> Move obtaining hole64_start from device_memory memory region base/size
>> into an helper alongside correspondent getters in pc_memory_init() when
>> the hotplug range is unitialized. While doing that remove the memory
>> region based logic from this newly added helper.
>>
>> This is the final step that allows pc_pci_hole64_start() to be callable
>> at the beginning of pc_memory_init() before any memory regions are
>> initialized.
>>
>> Cc: Jonathan Cameron <jonathan.cameron@huawei.com>
>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>
> Acked-by: Igor Mammedov <imammedo@redhat.com>
>
Thanks!
>> ---
>> hw/i386/pc.c | 47 ++++++++++++++++++++++++++++++++---------------
>> 1 file changed, 32 insertions(+), 15 deletions(-)
>>
>> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
>> index c654be6cf0bd..cda435e3baeb 100644
>> --- a/hw/i386/pc.c
>> +++ b/hw/i386/pc.c
>> @@ -825,15 +825,37 @@ static hwaddr pc_above_4g_end(PCMachineState *pcms)
>> return x86ms->above_4g_mem_start + x86ms->above_4g_mem_size;
>> }
>>
>> -static uint64_t pc_get_cxl_range_start(PCMachineState *pcms)
>> +static void pc_get_device_memory_range(PCMachineState *pcms,
>> + hwaddr *base,
>> + ram_addr_t *device_mem_size)
>> {
>> PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
>> MachineState *machine = MACHINE(pcms);
>> + ram_addr_t size;
>> + hwaddr addr;
>> +
>> + size = machine->maxram_size - machine->ram_size;
>> + addr = ROUND_UP(pc_above_4g_end(pcms), 1 * GiB);
>> +
>> + if (pcmc->enforce_aligned_dimm) {
>> + /* size device region assuming 1G page max alignment per slot */
>> + size += (1 * GiB) * machine->ram_slots;
>> + }
>> +
>> + *base = addr;
>> + *device_mem_size = size;
>> +}
>> +
>> +
>
> stray newline
>
I'll remove it in the v9 respin.
>> +static uint64_t pc_get_cxl_range_start(PCMachineState *pcms)
>> +{
>> + PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
>> hwaddr cxl_base;
>> + ram_addr_t size;
>>
>> - if (pcmc->has_reserved_memory && machine->device_memory->base) {
>> - cxl_base = machine->device_memory->base
>> - + memory_region_size(&machine->device_memory->mr);
>> + if (pcmc->has_reserved_memory) {
>> + pc_get_device_memory_range(pcms, &cxl_base, &size);
>> + cxl_base += size;
>> } else {
>> cxl_base = pc_above_4g_end(pcms);
>> }
>> @@ -920,7 +942,7 @@ void pc_memory_init(PCMachineState *pcms,
>> /* initialize device memory address space */
>> if (pcmc->has_reserved_memory &&
>> (machine->ram_size < machine->maxram_size)) {
>> - ram_addr_t device_mem_size = machine->maxram_size - machine->ram_size;
>> + ram_addr_t device_mem_size;
>>
>> if (machine->ram_slots > ACPI_MAX_RAM_SLOTS) {
>> error_report("unsupported amount of memory slots: %"PRIu64,
>> @@ -935,13 +957,7 @@ void pc_memory_init(PCMachineState *pcms,
>> exit(EXIT_FAILURE);
>> }
>>
>> - machine->device_memory->base =
>> - ROUND_UP(pc_above_4g_end(pcms), 1 * GiB);
>> -
>> - if (pcmc->enforce_aligned_dimm) {
>> - /* size device region assuming 1G page max alignment per slot */
>> - device_mem_size += (1 * GiB) * machine->ram_slots;
>> - }
>> + pc_get_device_memory_range(pcms, &machine->device_memory->base, &device_mem_size);
>>
>> if ((machine->device_memory->base + device_mem_size) <
>> device_mem_size) {
>> @@ -1046,13 +1062,14 @@ uint64_t pc_pci_hole64_start(void)
>> PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
>> MachineState *ms = MACHINE(pcms);
>> uint64_t hole64_start = 0;
>> + ram_addr_t size = 0;
>>
>> if (pcms->cxl_devices_state.is_enabled) {
>> hole64_start = pc_get_cxl_range_end(pcms);
>> - } else if (pcmc->has_reserved_memory && ms->device_memory->base) {
>> - hole64_start = ms->device_memory->base;
>> + } else if (pcmc->has_reserved_memory && (ms->ram_size < ms->maxram_size)) {
>> + pc_get_device_memory_range(pcms, &hole64_start, &size);
>> if (!pcmc->broken_reserved_end) {
>> - hole64_start += memory_region_size(&ms->device_memory->mr);
>> + hole64_start += size;
>> }
>> } else {
>> hole64_start = pc_above_4g_end(pcms);
>
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH v8 09/11] i386/pc: bounds check phys-bits against max used GPA
2022-07-15 17:16 [PATCH v8 00/11] i386/pc: Fix creation of >= 1010G guests on AMD systems with IOMMU Joao Martins
` (7 preceding siblings ...)
2022-07-15 17:16 ` [PATCH v8 08/11] i386/pc: factor out device_memory base/size to helper Joao Martins
@ 2022-07-15 17:16 ` Joao Martins
2022-07-18 13:16 ` Igor Mammedov
2022-07-15 17:16 ` [PATCH v8 10/11] i386/pc: relocate 4g start to 1T where applicable Joao Martins
2022-07-15 17:16 ` [PATCH v8 11/11] i386/pc: restrict AMD only enforcing of 1Tb hole to new machine type Joao Martins
10 siblings, 1 reply; 23+ messages in thread
From: Joao Martins @ 2022-07-15 17:16 UTC (permalink / raw)
To: qemu-devel
Cc: Igor Mammedov, Eduardo Habkost, Michael S. Tsirkin,
Richard Henderson, Alex Williamson, Paolo Bonzini, Ani Sinha,
Marcel Apfelbaum, Dr. David Alan Gilbert, Suravee Suthikulpanit,
Joao Martins
Calculate max *used* GPA against the CPU maximum possible address
and error out if the former surprasses the latter. This ensures
max used GPA is reacheable by configured phys-bits. Default phys-bits
on Qemu is TCG_PHYS_ADDR_BITS (40) which is enough for the CPU to
address 1Tb (0xff ffff ffff) or 1010G (0xfc ffff ffff) in AMD hosts
with IOMMU.
This is preparation for AMD guests with >1010G, where it will want relocate
ram-above-4g to be after 1Tb instead of 4G.
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
---
hw/i386/pc.c | 27 +++++++++++++++++++++++++++
1 file changed, 27 insertions(+)
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index cda435e3baeb..f30661b7f1a2 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -880,6 +880,18 @@ static uint64_t pc_get_cxl_range_end(PCMachineState *pcms)
return start;
}
+static hwaddr pc_max_used_gpa(PCMachineState *pcms, uint64_t pci_hole64_size)
+{
+ X86CPU *cpu = X86_CPU(first_cpu);
+
+ /* 32-bit systems don't have hole64 thus return max CPU address */
+ if (cpu->phys_bits <= 32) {
+ return ((hwaddr)1 << cpu->phys_bits) - 1;
+ }
+
+ return pc_pci_hole64_start() + pci_hole64_size - 1;
+}
+
void pc_memory_init(PCMachineState *pcms,
MemoryRegion *system_memory,
MemoryRegion *rom_memory,
@@ -894,13 +906,28 @@ void pc_memory_init(PCMachineState *pcms,
MachineClass *mc = MACHINE_GET_CLASS(machine);
PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
X86MachineState *x86ms = X86_MACHINE(pcms);
+ hwaddr maxphysaddr, maxusedaddr;
hwaddr cxl_base, cxl_resv_end = 0;
+ X86CPU *cpu = X86_CPU(first_cpu);
assert(machine->ram_size == x86ms->below_4g_mem_size +
x86ms->above_4g_mem_size);
linux_boot = (machine->kernel_filename != NULL);
+ /*
+ * phys-bits is required to be appropriately configured
+ * to make sure max used GPA is reachable.
+ */
+ maxusedaddr = pc_max_used_gpa(pcms, pci_hole64_size);
+ maxphysaddr = ((hwaddr)1 << cpu->phys_bits) - 1;
+ if (maxphysaddr < maxusedaddr) {
+ error_report("Address space limit 0x%"PRIx64" < 0x%"PRIx64
+ " phys-bits too low (%u)",
+ maxphysaddr, maxusedaddr, cpu->phys_bits);
+ exit(EXIT_FAILURE);
+ }
+
/*
* Split single memory region and use aliases to address portions of it,
* done for backwards compatibility with older qemus.
--
2.17.2
^ permalink raw reply related [flat|nested] 23+ messages in thread
* Re: [PATCH v8 09/11] i386/pc: bounds check phys-bits against max used GPA
2022-07-15 17:16 ` [PATCH v8 09/11] i386/pc: bounds check phys-bits against max used GPA Joao Martins
@ 2022-07-18 13:16 ` Igor Mammedov
2022-07-18 13:56 ` Igor Mammedov
0 siblings, 1 reply; 23+ messages in thread
From: Igor Mammedov @ 2022-07-18 13:16 UTC (permalink / raw)
To: Joao Martins
Cc: qemu-devel, Eduardo Habkost, Michael S. Tsirkin,
Richard Henderson, Alex Williamson, Paolo Bonzini, Ani Sinha,
Marcel Apfelbaum, Dr. David Alan Gilbert, Suravee Suthikulpanit
On Fri, 15 Jul 2022 18:16:26 +0100
Joao Martins <joao.m.martins@oracle.com> wrote:
> Calculate max *used* GPA against the CPU maximum possible address
> and error out if the former surprasses the latter. This ensures
> max used GPA is reacheable by configured phys-bits. Default phys-bits
> on Qemu is TCG_PHYS_ADDR_BITS (40) which is enough for the CPU to
> address 1Tb (0xff ffff ffff) or 1010G (0xfc ffff ffff) in AMD hosts
> with IOMMU.
>
> This is preparation for AMD guests with >1010G, where it will want relocate
> ram-above-4g to be after 1Tb instead of 4G.
>
> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Acked-by: Igor Mammedov <imammedo@redhat.com>
> ---
> hw/i386/pc.c | 27 +++++++++++++++++++++++++++
> 1 file changed, 27 insertions(+)
>
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index cda435e3baeb..f30661b7f1a2 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -880,6 +880,18 @@ static uint64_t pc_get_cxl_range_end(PCMachineState *pcms)
> return start;
> }
>
> +static hwaddr pc_max_used_gpa(PCMachineState *pcms, uint64_t pci_hole64_size)
> +{
> + X86CPU *cpu = X86_CPU(first_cpu);
> +
> + /* 32-bit systems don't have hole64 thus return max CPU address */
> + if (cpu->phys_bits <= 32) {
> + return ((hwaddr)1 << cpu->phys_bits) - 1;
> + }
> +
> + return pc_pci_hole64_start() + pci_hole64_size - 1;
> +}
> +
> void pc_memory_init(PCMachineState *pcms,
> MemoryRegion *system_memory,
> MemoryRegion *rom_memory,
> @@ -894,13 +906,28 @@ void pc_memory_init(PCMachineState *pcms,
> MachineClass *mc = MACHINE_GET_CLASS(machine);
> PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
> X86MachineState *x86ms = X86_MACHINE(pcms);
> + hwaddr maxphysaddr, maxusedaddr;
> hwaddr cxl_base, cxl_resv_end = 0;
> + X86CPU *cpu = X86_CPU(first_cpu);
>
> assert(machine->ram_size == x86ms->below_4g_mem_size +
> x86ms->above_4g_mem_size);
>
> linux_boot = (machine->kernel_filename != NULL);
>
> + /*
> + * phys-bits is required to be appropriately configured
> + * to make sure max used GPA is reachable.
> + */
> + maxusedaddr = pc_max_used_gpa(pcms, pci_hole64_size);
> + maxphysaddr = ((hwaddr)1 << cpu->phys_bits) - 1;
> + if (maxphysaddr < maxusedaddr) {
> + error_report("Address space limit 0x%"PRIx64" < 0x%"PRIx64
> + " phys-bits too low (%u)",
> + maxphysaddr, maxusedaddr, cpu->phys_bits);
> + exit(EXIT_FAILURE);
> + }
> +
> /*
> * Split single memory region and use aliases to address portions of it,
> * done for backwards compatibility with older qemus.
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH v8 09/11] i386/pc: bounds check phys-bits against max used GPA
2022-07-18 13:16 ` Igor Mammedov
@ 2022-07-18 13:56 ` Igor Mammedov
2022-07-18 14:21 ` Joao Martins
0 siblings, 1 reply; 23+ messages in thread
From: Igor Mammedov @ 2022-07-18 13:56 UTC (permalink / raw)
To: Joao Martins
Cc: qemu-devel, Eduardo Habkost, Michael S. Tsirkin,
Richard Henderson, Alex Williamson, Paolo Bonzini, Ani Sinha,
Marcel Apfelbaum, Dr. David Alan Gilbert, Suravee Suthikulpanit
On Mon, 18 Jul 2022 15:16:22 +0200
Igor Mammedov <imammedo@redhat.com> wrote:
> On Fri, 15 Jul 2022 18:16:26 +0100
> Joao Martins <joao.m.martins@oracle.com> wrote:
>
> > Calculate max *used* GPA against the CPU maximum possible address
> > and error out if the former surprasses the latter. This ensures
> > max used GPA is reacheable by configured phys-bits. Default phys-bits
> > on Qemu is TCG_PHYS_ADDR_BITS (40) which is enough for the CPU to
> > address 1Tb (0xff ffff ffff) or 1010G (0xfc ffff ffff) in AMD hosts
> > with IOMMU.
> >
> > This is preparation for AMD guests with >1010G, where it will want relocate
> > ram-above-4g to be after 1Tb instead of 4G.
> >
> > Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>
> Acked-by: Igor Mammedov <imammedo@redhat.com>
[...]
> > + return pc_pci_hole64_start() + pci_hole64_size - 1;
off by 1?
> > +}
> > +
[...]
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH v8 09/11] i386/pc: bounds check phys-bits against max used GPA
2022-07-18 13:56 ` Igor Mammedov
@ 2022-07-18 14:21 ` Joao Martins
0 siblings, 0 replies; 23+ messages in thread
From: Joao Martins @ 2022-07-18 14:21 UTC (permalink / raw)
To: Igor Mammedov
Cc: qemu-devel, Eduardo Habkost, Michael S. Tsirkin,
Richard Henderson, Alex Williamson, Paolo Bonzini, Ani Sinha,
Marcel Apfelbaum, Dr. David Alan Gilbert, Suravee Suthikulpanit
On 7/18/22 14:56, Igor Mammedov wrote:
> On Mon, 18 Jul 2022 15:16:22 +0200
> Igor Mammedov <imammedo@redhat.com> wrote:
>
>> On Fri, 15 Jul 2022 18:16:26 +0100
>> Joao Martins <joao.m.martins@oracle.com> wrote:
>>
>>> Calculate max *used* GPA against the CPU maximum possible address
>>> and error out if the former surprasses the latter. This ensures
>>> max used GPA is reacheable by configured phys-bits. Default phys-bits
>>> on Qemu is TCG_PHYS_ADDR_BITS (40) which is enough for the CPU to
>>> address 1Tb (0xff ffff ffff) or 1010G (0xfc ffff ffff) in AMD hosts
>>> with IOMMU.
>>>
>>> This is preparation for AMD guests with >1010G, where it will want relocate
>>> ram-above-4g to be after 1Tb instead of 4G.
>>>
>>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>>
>> Acked-by: Igor Mammedov <imammedo@redhat.com>
> [...]
>
>>> + return pc_pci_hole64_start() + pci_hole64_size - 1;
>
> off by 1?
>
If you add a size to a start of range, you get past the end, not
the actual end address. And given that we are supposed to return the
end address ... or am I seeing a non issue here?
[Also this was new in v8 predecessor patches didn't have it.]
>>> +}
>>> +
> [...]
>
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH v8 10/11] i386/pc: relocate 4g start to 1T where applicable
2022-07-15 17:16 [PATCH v8 00/11] i386/pc: Fix creation of >= 1010G guests on AMD systems with IOMMU Joao Martins
` (8 preceding siblings ...)
2022-07-15 17:16 ` [PATCH v8 09/11] i386/pc: bounds check phys-bits against max used GPA Joao Martins
@ 2022-07-15 17:16 ` Joao Martins
2022-07-18 13:18 ` Igor Mammedov
2022-07-15 17:16 ` [PATCH v8 11/11] i386/pc: restrict AMD only enforcing of 1Tb hole to new machine type Joao Martins
10 siblings, 1 reply; 23+ messages in thread
From: Joao Martins @ 2022-07-15 17:16 UTC (permalink / raw)
To: qemu-devel
Cc: Igor Mammedov, Eduardo Habkost, Michael S. Tsirkin,
Richard Henderson, Alex Williamson, Paolo Bonzini, Ani Sinha,
Marcel Apfelbaum, Dr. David Alan Gilbert, Suravee Suthikulpanit,
Joao Martins
It is assumed that the whole GPA space is available to be DMA
addressable, within a given address space limit, except for a
tiny region before the 4G. Since Linux v5.4, VFIO validates
whether the selected GPA is indeed valid i.e. not reserved by
IOMMU on behalf of some specific devices or platform-defined
restrictions, and thus failing the ioctl(VFIO_DMA_MAP) with
-EINVAL.
AMD systems with an IOMMU are examples of such platforms and
particularly may only have these ranges as allowed:
0000000000000000 - 00000000fedfffff (0 .. 3.982G)
00000000fef00000 - 000000fcffffffff (3.983G .. 1011.9G)
0000010000000000 - ffffffffffffffff (1Tb .. 16Pb[*])
We already account for the 4G hole, albeit if the guest is big
enough we will fail to allocate a guest with >1010G due to the
~12G hole at the 1Tb boundary, reserved for HyperTransport (HT).
[*] there is another reserved region unrelated to HT that exists
in the 256T boundary in Fam 17h according to Errata #1286,
documeted also in "Open-Source Register Reference for AMD Family
17h Processors (PUB)"
When creating the region above 4G, take into account that on AMD
platforms the HyperTransport range is reserved and hence it
cannot be used either as GPAs. On those cases rather than
establishing the start of ram-above-4g to be 4G, relocate instead
to 1Tb. See AMD IOMMU spec, section 2.1.2 "IOMMU Logical
Topology", for more information on the underlying restriction of
IOVAs.
After accounting for the 1Tb hole on AMD hosts, mtree should
look like:
0000000000000000-000000007fffffff (prio 0, i/o):
alias ram-below-4g @pc.ram 0000000000000000-000000007fffffff
0000010000000000-000001ff7fffffff (prio 0, i/o):
alias ram-above-4g @pc.ram 0000000080000000-000000ffffffffff
If the relocation is done or the address space covers it, we
also add the the reserved HT e820 range as reserved.
Default phys-bits on Qemu is TCG_PHYS_ADDR_BITS (40) which is enough
to address 1Tb (0xff ffff ffff). On AMD platforms, if a
ram-above-4g relocation is attempted and the CPU wasn't configured
with a big enough phys-bits, an error message will be printed
due to the maxphysaddr vs maxusedaddr check previously added.
Suggested-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
---
hw/i386/pc.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 54 insertions(+)
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index f30661b7f1a2..a71135930833 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -892,6 +892,40 @@ static hwaddr pc_max_used_gpa(PCMachineState *pcms, uint64_t pci_hole64_size)
return pc_pci_hole64_start() + pci_hole64_size - 1;
}
+/*
+ * AMD systems with an IOMMU have an additional hole close to the
+ * 1Tb, which are special GPAs that cannot be DMA mapped. Depending
+ * on kernel version, VFIO may or may not let you DMA map those ranges.
+ * Starting Linux v5.4 we validate it, and can't create guests on AMD machines
+ * with certain memory sizes. It's also wrong to use those IOVA ranges
+ * in detriment of leading to IOMMU INVALID_DEVICE_REQUEST or worse.
+ * The ranges reserved for Hyper-Transport are:
+ *
+ * FD_0000_0000h - FF_FFFF_FFFFh
+ *
+ * The ranges represent the following:
+ *
+ * Base Address Top Address Use
+ *
+ * FD_0000_0000h FD_F7FF_FFFFh Reserved interrupt address space
+ * FD_F800_0000h FD_F8FF_FFFFh Interrupt/EOI IntCtl
+ * FD_F900_0000h FD_F90F_FFFFh Legacy PIC IACK
+ * FD_F910_0000h FD_F91F_FFFFh System Management
+ * FD_F920_0000h FD_FAFF_FFFFh Reserved Page Tables
+ * FD_FB00_0000h FD_FBFF_FFFFh Address Translation
+ * FD_FC00_0000h FD_FDFF_FFFFh I/O Space
+ * FD_FE00_0000h FD_FFFF_FFFFh Configuration
+ * FE_0000_0000h FE_1FFF_FFFFh Extended Configuration/Device Messages
+ * FE_2000_0000h FF_FFFF_FFFFh Reserved
+ *
+ * See AMD IOMMU spec, section 2.1.2 "IOMMU Logical Topology",
+ * Table 3: Special Address Controls (GPA) for more information.
+ */
+#define AMD_HT_START 0xfd00000000UL
+#define AMD_HT_END 0xffffffffffUL
+#define AMD_ABOVE_1TB_START (AMD_HT_END + 1)
+#define AMD_HT_SIZE (AMD_ABOVE_1TB_START - AMD_HT_START)
+
void pc_memory_init(PCMachineState *pcms,
MemoryRegion *system_memory,
MemoryRegion *rom_memory,
@@ -915,6 +949,26 @@ void pc_memory_init(PCMachineState *pcms,
linux_boot = (machine->kernel_filename != NULL);
+ /*
+ * The HyperTransport range close to the 1T boundary is unique to AMD
+ * hosts with IOMMUs enabled. Restrict the ram-above-4g relocation
+ * to above 1T to AMD vCPUs only.
+ */
+ if (IS_AMD_CPU(&cpu->env)) {
+ /* Bail out if max possible address does not cross HT range */
+ if (pc_max_used_gpa(pcms, pci_hole64_size) >= AMD_HT_START) {
+ x86ms->above_4g_mem_start = AMD_ABOVE_1TB_START;
+ }
+
+ /*
+ * Advertise the HT region if address space covers the reserved
+ * region or if we relocate.
+ */
+ if (cpu->phys_bits >= 40) {
+ e820_add_entry(AMD_HT_START, AMD_HT_SIZE, E820_RESERVED);
+ }
+ }
+
/*
* phys-bits is required to be appropriately configured
* to make sure max used GPA is reachable.
--
2.17.2
^ permalink raw reply related [flat|nested] 23+ messages in thread
* Re: [PATCH v8 10/11] i386/pc: relocate 4g start to 1T where applicable
2022-07-15 17:16 ` [PATCH v8 10/11] i386/pc: relocate 4g start to 1T where applicable Joao Martins
@ 2022-07-18 13:18 ` Igor Mammedov
0 siblings, 0 replies; 23+ messages in thread
From: Igor Mammedov @ 2022-07-18 13:18 UTC (permalink / raw)
To: Joao Martins
Cc: qemu-devel, Eduardo Habkost, Michael S. Tsirkin,
Richard Henderson, Alex Williamson, Paolo Bonzini, Ani Sinha,
Marcel Apfelbaum, Dr. David Alan Gilbert, Suravee Suthikulpanit
On Fri, 15 Jul 2022 18:16:27 +0100
Joao Martins <joao.m.martins@oracle.com> wrote:
> It is assumed that the whole GPA space is available to be DMA
> addressable, within a given address space limit, except for a
> tiny region before the 4G. Since Linux v5.4, VFIO validates
> whether the selected GPA is indeed valid i.e. not reserved by
> IOMMU on behalf of some specific devices or platform-defined
> restrictions, and thus failing the ioctl(VFIO_DMA_MAP) with
> -EINVAL.
>
> AMD systems with an IOMMU are examples of such platforms and
> particularly may only have these ranges as allowed:
>
> 0000000000000000 - 00000000fedfffff (0 .. 3.982G)
> 00000000fef00000 - 000000fcffffffff (3.983G .. 1011.9G)
> 0000010000000000 - ffffffffffffffff (1Tb .. 16Pb[*])
>
> We already account for the 4G hole, albeit if the guest is big
> enough we will fail to allocate a guest with >1010G due to the
> ~12G hole at the 1Tb boundary, reserved for HyperTransport (HT).
>
> [*] there is another reserved region unrelated to HT that exists
> in the 256T boundary in Fam 17h according to Errata #1286,
> documeted also in "Open-Source Register Reference for AMD Family
> 17h Processors (PUB)"
>
> When creating the region above 4G, take into account that on AMD
> platforms the HyperTransport range is reserved and hence it
> cannot be used either as GPAs. On those cases rather than
> establishing the start of ram-above-4g to be 4G, relocate instead
> to 1Tb. See AMD IOMMU spec, section 2.1.2 "IOMMU Logical
> Topology", for more information on the underlying restriction of
> IOVAs.
>
> After accounting for the 1Tb hole on AMD hosts, mtree should
> look like:
>
> 0000000000000000-000000007fffffff (prio 0, i/o):
> alias ram-below-4g @pc.ram 0000000000000000-000000007fffffff
> 0000010000000000-000001ff7fffffff (prio 0, i/o):
> alias ram-above-4g @pc.ram 0000000080000000-000000ffffffffff
>
> If the relocation is done or the address space covers it, we
> also add the the reserved HT e820 range as reserved.
>
> Default phys-bits on Qemu is TCG_PHYS_ADDR_BITS (40) which is enough
> to address 1Tb (0xff ffff ffff). On AMD platforms, if a
> ram-above-4g relocation is attempted and the CPU wasn't configured
> with a big enough phys-bits, an error message will be printed
> due to the maxphysaddr vs maxusedaddr check previously added.
>
> Suggested-by: Igor Mammedov <imammedo@redhat.com>
> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Acked-by: Igor Mammedov <imammedo@redhat.com>
> ---
> hw/i386/pc.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 54 insertions(+)
>
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index f30661b7f1a2..a71135930833 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -892,6 +892,40 @@ static hwaddr pc_max_used_gpa(PCMachineState *pcms, uint64_t pci_hole64_size)
> return pc_pci_hole64_start() + pci_hole64_size - 1;
> }
>
> +/*
> + * AMD systems with an IOMMU have an additional hole close to the
> + * 1Tb, which are special GPAs that cannot be DMA mapped. Depending
> + * on kernel version, VFIO may or may not let you DMA map those ranges.
> + * Starting Linux v5.4 we validate it, and can't create guests on AMD machines
> + * with certain memory sizes. It's also wrong to use those IOVA ranges
> + * in detriment of leading to IOMMU INVALID_DEVICE_REQUEST or worse.
> + * The ranges reserved for Hyper-Transport are:
> + *
> + * FD_0000_0000h - FF_FFFF_FFFFh
> + *
> + * The ranges represent the following:
> + *
> + * Base Address Top Address Use
> + *
> + * FD_0000_0000h FD_F7FF_FFFFh Reserved interrupt address space
> + * FD_F800_0000h FD_F8FF_FFFFh Interrupt/EOI IntCtl
> + * FD_F900_0000h FD_F90F_FFFFh Legacy PIC IACK
> + * FD_F910_0000h FD_F91F_FFFFh System Management
> + * FD_F920_0000h FD_FAFF_FFFFh Reserved Page Tables
> + * FD_FB00_0000h FD_FBFF_FFFFh Address Translation
> + * FD_FC00_0000h FD_FDFF_FFFFh I/O Space
> + * FD_FE00_0000h FD_FFFF_FFFFh Configuration
> + * FE_0000_0000h FE_1FFF_FFFFh Extended Configuration/Device Messages
> + * FE_2000_0000h FF_FFFF_FFFFh Reserved
> + *
> + * See AMD IOMMU spec, section 2.1.2 "IOMMU Logical Topology",
> + * Table 3: Special Address Controls (GPA) for more information.
> + */
> +#define AMD_HT_START 0xfd00000000UL
> +#define AMD_HT_END 0xffffffffffUL
> +#define AMD_ABOVE_1TB_START (AMD_HT_END + 1)
> +#define AMD_HT_SIZE (AMD_ABOVE_1TB_START - AMD_HT_START)
> +
> void pc_memory_init(PCMachineState *pcms,
> MemoryRegion *system_memory,
> MemoryRegion *rom_memory,
> @@ -915,6 +949,26 @@ void pc_memory_init(PCMachineState *pcms,
>
> linux_boot = (machine->kernel_filename != NULL);
>
> + /*
> + * The HyperTransport range close to the 1T boundary is unique to AMD
> + * hosts with IOMMUs enabled. Restrict the ram-above-4g relocation
> + * to above 1T to AMD vCPUs only.
> + */
> + if (IS_AMD_CPU(&cpu->env)) {
> + /* Bail out if max possible address does not cross HT range */
> + if (pc_max_used_gpa(pcms, pci_hole64_size) >= AMD_HT_START) {
> + x86ms->above_4g_mem_start = AMD_ABOVE_1TB_START;
> + }
> +
> + /*
> + * Advertise the HT region if address space covers the reserved
> + * region or if we relocate.
> + */
> + if (cpu->phys_bits >= 40) {
> + e820_add_entry(AMD_HT_START, AMD_HT_SIZE, E820_RESERVED);
> + }
> + }
> +
> /*
> * phys-bits is required to be appropriately configured
> * to make sure max used GPA is reachable.
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH v8 11/11] i386/pc: restrict AMD only enforcing of 1Tb hole to new machine type
2022-07-15 17:16 [PATCH v8 00/11] i386/pc: Fix creation of >= 1010G guests on AMD systems with IOMMU Joao Martins
` (9 preceding siblings ...)
2022-07-15 17:16 ` [PATCH v8 10/11] i386/pc: relocate 4g start to 1T where applicable Joao Martins
@ 2022-07-15 17:16 ` Joao Martins
10 siblings, 0 replies; 23+ messages in thread
From: Joao Martins @ 2022-07-15 17:16 UTC (permalink / raw)
To: qemu-devel
Cc: Igor Mammedov, Eduardo Habkost, Michael S. Tsirkin,
Richard Henderson, Alex Williamson, Paolo Bonzini, Ani Sinha,
Marcel Apfelbaum, Dr. David Alan Gilbert, Suravee Suthikulpanit,
Joao Martins
The added enforcing is only relevant in the case of AMD where the
range right before the 1TB is restricted and cannot be DMA mapped
by the kernel consequently leading to IOMMU INVALID_DEVICE_REQUEST
or possibly other kinds of IOMMU events in the AMD IOMMU.
Although, there's a case where it may make sense to disable the
IOVA relocation/validation when migrating from a
non-amd-1tb-aware qemu to one that supports it.
Relocating RAM regions to after the 1Tb hole has consequences for
guest ABI because we are changing the memory mapping, so make
sure that only new machine enforce but not older ones.
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Acked-by: Igor Mammedov <imammedo@redhat.com>
---
hw/i386/pc.c | 6 ++++--
hw/i386/pc_piix.c | 2 ++
hw/i386/pc_q35.c | 2 ++
include/hw/i386/pc.h | 1 +
4 files changed, 9 insertions(+), 2 deletions(-)
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index a71135930833..c8d878cb8059 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -952,9 +952,10 @@ void pc_memory_init(PCMachineState *pcms,
/*
* The HyperTransport range close to the 1T boundary is unique to AMD
* hosts with IOMMUs enabled. Restrict the ram-above-4g relocation
- * to above 1T to AMD vCPUs only.
+ * to above 1T to AMD vCPUs only. @enforce_amd_1tb_hole is only false in
+ * older machine types (<= 7.0) for compatibility purposes.
*/
- if (IS_AMD_CPU(&cpu->env)) {
+ if (IS_AMD_CPU(&cpu->env) && pcmc->enforce_amd_1tb_hole) {
/* Bail out if max possible address does not cross HT range */
if (pc_max_used_gpa(pcms, pci_hole64_size) >= AMD_HT_START) {
x86ms->above_4g_mem_start = AMD_ABOVE_1TB_START;
@@ -1903,6 +1904,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
pcmc->has_reserved_memory = true;
pcmc->kvmclock_enabled = true;
pcmc->enforce_aligned_dimm = true;
+ pcmc->enforce_amd_1tb_hole = true;
/* BIOS ACPI tables: 128K. Other BIOS datastructures: less than 4K reported
* to be used at the moment, 32K should be enough for a while. */
pcmc->acpi_data_size = 0x20000 + 0x8000;
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 2a483e8666b4..074571bc03a8 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -446,9 +446,11 @@ DEFINE_I440FX_MACHINE(v7_1, "pc-i440fx-7.1", NULL,
static void pc_i440fx_7_0_machine_options(MachineClass *m)
{
+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
pc_i440fx_7_1_machine_options(m);
m->alias = NULL;
m->is_default = false;
+ pcmc->enforce_amd_1tb_hole = false;
compat_props_add(m->compat_props, hw_compat_7_0, hw_compat_7_0_len);
compat_props_add(m->compat_props, pc_compat_7_0, pc_compat_7_0_len);
}
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 99ed75371c67..f3aa4694a299 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -383,8 +383,10 @@ DEFINE_Q35_MACHINE(v7_1, "pc-q35-7.1", NULL,
static void pc_q35_7_0_machine_options(MachineClass *m)
{
+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
pc_q35_7_1_machine_options(m);
m->alias = NULL;
+ pcmc->enforce_amd_1tb_hole = false;
compat_props_add(m->compat_props, hw_compat_7_0, hw_compat_7_0_len);
compat_props_add(m->compat_props, pc_compat_7_0, pc_compat_7_0_len);
}
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 568c226d3034..9cc3f5d33805 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -118,6 +118,7 @@ struct PCMachineClass {
bool has_reserved_memory;
bool enforce_aligned_dimm;
bool broken_reserved_end;
+ bool enforce_amd_1tb_hole;
/* generate legacy CPU hotplug AML */
bool legacy_cpu_hotplug;
--
2.17.2
^ permalink raw reply related [flat|nested] 23+ messages in thread