* [Qemu-devel] [PATCH] hw/pci-host: Fix x86 Host Bridges 64bit PCI hole @ 2017-10-18 9:58 Marcel Apfelbaum 2017-10-18 10:45 ` Igor Mammedov 2017-10-18 11:40 ` Laszlo Ersek 0 siblings, 2 replies; 19+ messages in thread From: Marcel Apfelbaum @ 2017-10-18 9:58 UTC (permalink / raw) To: qemu-devel; +Cc: marcel, mst, pbonzini, ehabkost, lersek Currently there is no MMIO range over 4G reserved for PCI hotplug. Since the 32bit PCI hole depends on the number of cold-plugged PCI devices and other factors, it is very possible is too small to hotplug PCI devices with large BARs. Fix it by reserving all the address space after RAM (and the reserved space for RAM hotplug), but no more than 40bits. The later seems to be QEMU's magic number for the Guest physical CPU addressable bits and is safe with respect to migration. Note this is a regression since prev QEMU versions had some range reserved for 64bit PCI hotplug. Signed-off-by: Marcel Apfelbaum <marcel@redhat.com> --- hw/i386/pc.c | 22 ++++++++++++++++++++++ hw/pci-host/piix.c | 10 ++++++++++ hw/pci-host/q35.c | 9 +++++++++ include/hw/i386/pc.h | 10 ++++++++++ include/hw/pci-host/q35.h | 1 + 5 files changed, 52 insertions(+) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 05985d4927..4df20d2b99 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1448,6 +1448,28 @@ void pc_memory_init(PCMachineState *pcms, pcms->ioapic_as = &address_space_memory; } +uint64_t pc_pci_hole64_start(void) +{ + PCMachineState *pcms = PC_MACHINE(qdev_get_machine()); + PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); + uint64_t hole64_start = 0; + + if (pcmc->has_reserved_memory && pcms->hotplug_memory.base) { + hole64_start = pcms->hotplug_memory.base; + if (!pcmc->broken_reserved_end) { + hole64_start += memory_region_size(&pcms->hotplug_memory.mr); + } + } else { + hole64_start = 0x100000000ULL + pcms->above_4g_mem_size; + } + + if (hole64_start > PCI_HOST_PCI_HOLE64_END) { + hole64_start = PCI_HOST_PCI_HOLE64_END; + } + + return hole64_start; +} + qemu_irq pc_allocate_cpu_irq(void) { return qemu_allocate_irq(pic_irq_request, NULL, 0); diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c index dec345fd24..317a232972 100644 --- a/hw/pci-host/piix.c +++ b/hw/pci-host/piix.c @@ -50,6 +50,7 @@ typedef struct I440FXState { PCIHostState parent_obj; Range pci_hole; uint64_t pci_hole64_size; + bool pci_hole64_fix; uint32_t short_root_bus; } I440FXState; @@ -243,11 +244,15 @@ static void i440fx_pcihost_get_pci_hole64_start(Object *obj, Visitor *v, void *opaque, Error **errp) { PCIHostState *h = PCI_HOST_BRIDGE(obj); + I440FXState *s = I440FX_PCI_HOST_BRIDGE(obj); Range w64; uint64_t value; pci_bus_get_w64_range(h->bus, &w64); value = range_is_empty(&w64) ? 0 : range_lob(&w64); + if (!value && s->pci_hole64_fix) { + value = pc_pci_hole64_start(); + } visit_type_uint64(v, name, &value, errp); } @@ -256,11 +261,15 @@ static void i440fx_pcihost_get_pci_hole64_end(Object *obj, Visitor *v, Error **errp) { PCIHostState *h = PCI_HOST_BRIDGE(obj); + I440FXState *s = I440FX_PCI_HOST_BRIDGE(obj); Range w64; uint64_t value; pci_bus_get_w64_range(h->bus, &w64); value = range_is_empty(&w64) ? 0 : range_upb(&w64) + 1; + if (s->pci_hole64_fix && value < PCI_HOST_PCI_HOLE64_END) { + value = PCI_HOST_PCI_HOLE64_END; + } visit_type_uint64(v, name, &value, errp); } @@ -857,6 +866,7 @@ static Property i440fx_props[] = { DEFINE_PROP_SIZE(PCI_HOST_PROP_PCI_HOLE64_SIZE, I440FXState, pci_hole64_size, DEFAULT_PCI_HOLE64_SIZE), DEFINE_PROP_UINT32("short_root_bus", I440FXState, short_root_bus, 0), + DEFINE_PROP_BOOL("x-pci-hole64-fix", I440FXState, pci_hole64_fix, true), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c index 1ff648e80c..641213f177 100644 --- a/hw/pci-host/q35.c +++ b/hw/pci-host/q35.c @@ -104,11 +104,15 @@ static void q35_host_get_pci_hole64_start(Object *obj, Visitor *v, Error **errp) { PCIHostState *h = PCI_HOST_BRIDGE(obj); + Q35PCIHost *s = Q35_HOST_DEVICE(obj); Range w64; uint64_t value; pci_bus_get_w64_range(h->bus, &w64); value = range_is_empty(&w64) ? 0 : range_lob(&w64); + if (!value && s->pci_hole64_fix) { + value = pc_pci_hole64_start(); + } visit_type_uint64(v, name, &value, errp); } @@ -117,11 +121,15 @@ static void q35_host_get_pci_hole64_end(Object *obj, Visitor *v, Error **errp) { PCIHostState *h = PCI_HOST_BRIDGE(obj); + Q35PCIHost *s = Q35_HOST_DEVICE(obj); Range w64; uint64_t value; pci_bus_get_w64_range(h->bus, &w64); value = range_is_empty(&w64) ? 0 : range_upb(&w64) + 1; + if (s->pci_hole64_fix && value < PCI_HOST_PCI_HOLE64_END) { + value = PCI_HOST_PCI_HOLE64_END; + } visit_type_uint64(v, name, &value, errp); } @@ -143,6 +151,7 @@ static Property q35_host_props[] = { mch.below_4g_mem_size, 0), DEFINE_PROP_SIZE(PCI_HOST_ABOVE_4G_MEM_SIZE, Q35PCIHost, mch.above_4g_mem_size, 0), + DEFINE_PROP_BOOL("x-pci-hole64-fix", Q35PCIHost, pci_hole64_fix, true), DEFINE_PROP_END_OF_LIST(), }; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 087d184ef5..2e98e8c943 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -239,6 +239,7 @@ void pc_guest_info_init(PCMachineState *pcms); #define PCI_HOST_BELOW_4G_MEM_SIZE "below-4g-mem-size" #define PCI_HOST_ABOVE_4G_MEM_SIZE "above-4g-mem-size" #define DEFAULT_PCI_HOLE64_SIZE (~0x0ULL) +#define PCI_HOST_PCI_HOLE64_END (0x1ULL << 40) void pc_pci_as_mapping_init(Object *owner, MemoryRegion *system_memory, @@ -249,6 +250,7 @@ void pc_memory_init(PCMachineState *pcms, MemoryRegion *system_memory, MemoryRegion *rom_memory, MemoryRegion **ram_memory); +uint64_t pc_pci_hole64_start(void); qemu_irq pc_allocate_cpu_irq(void); DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus); void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi, @@ -375,6 +377,14 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); .driver = TYPE_X86_CPU,\ .property = "x-hv-max-vps",\ .value = "0x40",\ + },{\ + .driver = "i440FX-pcihost",\ + .property = "x-pci-hole64-fix",\ + .value = "false",\ + },{\ + .driver = "q35-pcihost",\ + .property = "x-pci-hole64-fix",\ + .value = "false",\ }, #define PC_COMPAT_2_9 \ diff --git a/include/hw/pci-host/q35.h b/include/hw/pci-host/q35.h index 58983c00b3..8f4ddde393 100644 --- a/include/hw/pci-host/q35.h +++ b/include/hw/pci-host/q35.h @@ -68,6 +68,7 @@ typedef struct Q35PCIHost { PCIExpressHost parent_obj; /*< public >*/ + bool pci_hole64_fix; MCHPCIState mch; } Q35PCIHost; -- 2.13.5 ^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [Qemu-devel] [PATCH] hw/pci-host: Fix x86 Host Bridges 64bit PCI hole 2017-10-18 9:58 [Qemu-devel] [PATCH] hw/pci-host: Fix x86 Host Bridges 64bit PCI hole Marcel Apfelbaum @ 2017-10-18 10:45 ` Igor Mammedov 2017-10-18 10:57 ` Marcel Apfelbaum 2017-10-18 11:40 ` Laszlo Ersek 1 sibling, 1 reply; 19+ messages in thread From: Igor Mammedov @ 2017-10-18 10:45 UTC (permalink / raw) To: Marcel Apfelbaum; +Cc: qemu-devel, pbonzini, lersek, ehabkost, mst On Wed, 18 Oct 2017 12:58:07 +0300 Marcel Apfelbaum <marcel@redhat.com> wrote: > Currently there is no MMIO range over 4G > reserved for PCI hotplug. Since the 32bit PCI hole > depends on the number of cold-plugged PCI devices > and other factors, it is very possible is too small > to hotplug PCI devices with large BARs. > > Fix it by reserving all the address space after > RAM (and the reserved space for RAM hotplug), > but no more than 40bits. The later seems to be > QEMU's magic number for the Guest physical CPU addressable > bits and is safe with respect to migration. > > Note this is a regression since prev QEMU versions had > some range reserved for 64bit PCI hotplug. > > Signed-off-by: Marcel Apfelbaum <marcel@redhat.com> > --- > hw/i386/pc.c | 22 ++++++++++++++++++++++ > hw/pci-host/piix.c | 10 ++++++++++ > hw/pci-host/q35.c | 9 +++++++++ > include/hw/i386/pc.h | 10 ++++++++++ > include/hw/pci-host/q35.h | 1 + > 5 files changed, 52 insertions(+) > > diff --git a/hw/i386/pc.c b/hw/i386/pc.c > index 05985d4927..4df20d2b99 100644 > --- a/hw/i386/pc.c > +++ b/hw/i386/pc.c > @@ -1448,6 +1448,28 @@ void pc_memory_init(PCMachineState *pcms, > pcms->ioapic_as = &address_space_memory; > } > > +uint64_t pc_pci_hole64_start(void) > +{ > + PCMachineState *pcms = PC_MACHINE(qdev_get_machine()); > + PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); > + uint64_t hole64_start = 0; > + > + if (pcmc->has_reserved_memory && pcms->hotplug_memory.base) { > + hole64_start = pcms->hotplug_memory.base; > + if (!pcmc->broken_reserved_end) { > + hole64_start += memory_region_size(&pcms->hotplug_memory.mr); > + } > + } else { > + hole64_start = 0x100000000ULL + pcms->above_4g_mem_size; > + } > + > + if (hole64_start > PCI_HOST_PCI_HOLE64_END) { > + hole64_start = PCI_HOST_PCI_HOLE64_END; > + } > + > + return hole64_start; maybe align up on 1Gb > +} > + > qemu_irq pc_allocate_cpu_irq(void) > { > return qemu_allocate_irq(pic_irq_request, NULL, 0); > diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c > index dec345fd24..317a232972 100644 > --- a/hw/pci-host/piix.c > +++ b/hw/pci-host/piix.c > @@ -50,6 +50,7 @@ typedef struct I440FXState { > PCIHostState parent_obj; > Range pci_hole; > uint64_t pci_hole64_size; > + bool pci_hole64_fix; > uint32_t short_root_bus; > } I440FXState; > > @@ -243,11 +244,15 @@ static void i440fx_pcihost_get_pci_hole64_start(Object *obj, Visitor *v, > void *opaque, Error **errp) > { > PCIHostState *h = PCI_HOST_BRIDGE(obj); > + I440FXState *s = I440FX_PCI_HOST_BRIDGE(obj); > Range w64; > uint64_t value; > > pci_bus_get_w64_range(h->bus, &w64); > value = range_is_empty(&w64) ? 0 : range_lob(&w64); > + if (!value && s->pci_hole64_fix) { > + value = pc_pci_hole64_start(); > + } > visit_type_uint64(v, name, &value, errp); > } > > @@ -256,11 +261,15 @@ static void i440fx_pcihost_get_pci_hole64_end(Object *obj, Visitor *v, > Error **errp) > { > PCIHostState *h = PCI_HOST_BRIDGE(obj); > + I440FXState *s = I440FX_PCI_HOST_BRIDGE(obj); > Range w64; > uint64_t value; > > pci_bus_get_w64_range(h->bus, &w64); > value = range_is_empty(&w64) ? 0 : range_upb(&w64) + 1; > + if (s->pci_hole64_fix && value < PCI_HOST_PCI_HOLE64_END) { > + value = PCI_HOST_PCI_HOLE64_END; > + } > visit_type_uint64(v, name, &value, errp); > } > > @@ -857,6 +866,7 @@ static Property i440fx_props[] = { > DEFINE_PROP_SIZE(PCI_HOST_PROP_PCI_HOLE64_SIZE, I440FXState, > pci_hole64_size, DEFAULT_PCI_HOLE64_SIZE), > DEFINE_PROP_UINT32("short_root_bus", I440FXState, short_root_bus, 0), > + DEFINE_PROP_BOOL("x-pci-hole64-fix", I440FXState, pci_hole64_fix, true), > DEFINE_PROP_END_OF_LIST(), > }; > > diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c > index 1ff648e80c..641213f177 100644 > --- a/hw/pci-host/q35.c > +++ b/hw/pci-host/q35.c > @@ -104,11 +104,15 @@ static void q35_host_get_pci_hole64_start(Object *obj, Visitor *v, > Error **errp) > { > PCIHostState *h = PCI_HOST_BRIDGE(obj); > + Q35PCIHost *s = Q35_HOST_DEVICE(obj); > Range w64; > uint64_t value; > > pci_bus_get_w64_range(h->bus, &w64); > value = range_is_empty(&w64) ? 0 : range_lob(&w64); > + if (!value && s->pci_hole64_fix) { > + value = pc_pci_hole64_start(); > + } > visit_type_uint64(v, name, &value, errp); > } > > @@ -117,11 +121,15 @@ static void q35_host_get_pci_hole64_end(Object *obj, Visitor *v, > Error **errp) > { > PCIHostState *h = PCI_HOST_BRIDGE(obj); > + Q35PCIHost *s = Q35_HOST_DEVICE(obj); > Range w64; > uint64_t value; > > pci_bus_get_w64_range(h->bus, &w64); > value = range_is_empty(&w64) ? 0 : range_upb(&w64) + 1; > + if (s->pci_hole64_fix && value < PCI_HOST_PCI_HOLE64_END) { > + value = PCI_HOST_PCI_HOLE64_END; > + } > visit_type_uint64(v, name, &value, errp); > } > > @@ -143,6 +151,7 @@ static Property q35_host_props[] = { > mch.below_4g_mem_size, 0), > DEFINE_PROP_SIZE(PCI_HOST_ABOVE_4G_MEM_SIZE, Q35PCIHost, > mch.above_4g_mem_size, 0), > + DEFINE_PROP_BOOL("x-pci-hole64-fix", Q35PCIHost, pci_hole64_fix, true), > DEFINE_PROP_END_OF_LIST(), > }; > > diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h > index 087d184ef5..2e98e8c943 100644 > --- a/include/hw/i386/pc.h > +++ b/include/hw/i386/pc.h > @@ -239,6 +239,7 @@ void pc_guest_info_init(PCMachineState *pcms); > #define PCI_HOST_BELOW_4G_MEM_SIZE "below-4g-mem-size" > #define PCI_HOST_ABOVE_4G_MEM_SIZE "above-4g-mem-size" > #define DEFAULT_PCI_HOLE64_SIZE (~0x0ULL) > +#define PCI_HOST_PCI_HOLE64_END (0x1ULL << 40) > > > void pc_pci_as_mapping_init(Object *owner, MemoryRegion *system_memory, > @@ -249,6 +250,7 @@ void pc_memory_init(PCMachineState *pcms, > MemoryRegion *system_memory, > MemoryRegion *rom_memory, > MemoryRegion **ram_memory); > +uint64_t pc_pci_hole64_start(void); > qemu_irq pc_allocate_cpu_irq(void); > DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus); > void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi, > @@ -375,6 +377,14 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); > .driver = TYPE_X86_CPU,\ > .property = "x-hv-max-vps",\ > .value = "0x40",\ > + },{\ > + .driver = "i440FX-pcihost",\ > + .property = "x-pci-hole64-fix",\ > + .value = "false",\ should be "off" > + },{\ > + .driver = "q35-pcihost",\ > + .property = "x-pci-hole64-fix",\ > + .value = "false",\ ditto > }, > > #define PC_COMPAT_2_9 \ > diff --git a/include/hw/pci-host/q35.h b/include/hw/pci-host/q35.h > index 58983c00b3..8f4ddde393 100644 > --- a/include/hw/pci-host/q35.h > +++ b/include/hw/pci-host/q35.h > @@ -68,6 +68,7 @@ typedef struct Q35PCIHost { > PCIExpressHost parent_obj; > /*< public >*/ > > + bool pci_hole64_fix; > MCHPCIState mch; > } Q35PCIHost; > ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [Qemu-devel] [PATCH] hw/pci-host: Fix x86 Host Bridges 64bit PCI hole 2017-10-18 10:45 ` Igor Mammedov @ 2017-10-18 10:57 ` Marcel Apfelbaum 0 siblings, 0 replies; 19+ messages in thread From: Marcel Apfelbaum @ 2017-10-18 10:57 UTC (permalink / raw) To: Igor Mammedov; +Cc: qemu-devel, pbonzini, lersek, ehabkost, mst Hi Igor, On 18/10/2017 13:45, Igor Mammedov wrote: > On Wed, 18 Oct 2017 12:58:07 +0300 > Marcel Apfelbaum <marcel@redhat.com> wrote: > >> Currently there is no MMIO range over 4G >> reserved for PCI hotplug. Since the 32bit PCI hole >> depends on the number of cold-plugged PCI devices >> and other factors, it is very possible is too small >> to hotplug PCI devices with large BARs. >> >> Fix it by reserving all the address space after >> RAM (and the reserved space for RAM hotplug), >> but no more than 40bits. The later seems to be >> QEMU's magic number for the Guest physical CPU addressable >> bits and is safe with respect to migration. >> >> Note this is a regression since prev QEMU versions had >> some range reserved for 64bit PCI hotplug. >> >> Signed-off-by: Marcel Apfelbaum <marcel@redhat.com> >> --- >> hw/i386/pc.c | 22 ++++++++++++++++++++++ >> hw/pci-host/piix.c | 10 ++++++++++ >> hw/pci-host/q35.c | 9 +++++++++ >> include/hw/i386/pc.h | 10 ++++++++++ >> include/hw/pci-host/q35.h | 1 + >> 5 files changed, 52 insertions(+) >> >> diff --git a/hw/i386/pc.c b/hw/i386/pc.c >> index 05985d4927..4df20d2b99 100644 >> --- a/hw/i386/pc.c >> +++ b/hw/i386/pc.c >> @@ -1448,6 +1448,28 @@ void pc_memory_init(PCMachineState *pcms, >> pcms->ioapic_as = &address_space_memory; >> } >> >> +uint64_t pc_pci_hole64_start(void) >> +{ >> + PCMachineState *pcms = PC_MACHINE(qdev_get_machine()); >> + PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); >> + uint64_t hole64_start = 0; >> + >> + if (pcmc->has_reserved_memory && pcms->hotplug_memory.base) { >> + hole64_start = pcms->hotplug_memory.base; >> + if (!pcmc->broken_reserved_end) { >> + hole64_start += memory_region_size(&pcms->hotplug_memory.mr); >> + } >> + } else { >> + hole64_start = 0x100000000ULL + pcms->above_4g_mem_size; >> + } >> + >> + if (hole64_start > PCI_HOST_PCI_HOLE64_END) { >> + hole64_start = PCI_HOST_PCI_HOLE64_END; >> + } >> + >> + return hole64_start; > maybe align up on 1Gb > I can do that, sure. >> +} >> + >> qemu_irq pc_allocate_cpu_irq(void) >> { >> return qemu_allocate_irq(pic_irq_request, NULL, 0); >> diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c >> index dec345fd24..317a232972 100644 >> --- a/hw/pci-host/piix.c >> +++ b/hw/pci-host/piix.c >> @@ -50,6 +50,7 @@ typedef struct I440FXState { >> PCIHostState parent_obj; >> Range pci_hole; >> uint64_t pci_hole64_size; >> + bool pci_hole64_fix; >> uint32_t short_root_bus; >> } I440FXState; >> >> @@ -243,11 +244,15 @@ static void i440fx_pcihost_get_pci_hole64_start(Object *obj, Visitor *v, >> void *opaque, Error **errp) >> { >> PCIHostState *h = PCI_HOST_BRIDGE(obj); >> + I440FXState *s = I440FX_PCI_HOST_BRIDGE(obj); >> Range w64; >> uint64_t value; >> >> pci_bus_get_w64_range(h->bus, &w64); >> value = range_is_empty(&w64) ? 0 : range_lob(&w64); >> + if (!value && s->pci_hole64_fix) { >> + value = pc_pci_hole64_start(); >> + } >> visit_type_uint64(v, name, &value, errp); >> } >> >> @@ -256,11 +261,15 @@ static void i440fx_pcihost_get_pci_hole64_end(Object *obj, Visitor *v, >> Error **errp) >> { >> PCIHostState *h = PCI_HOST_BRIDGE(obj); >> + I440FXState *s = I440FX_PCI_HOST_BRIDGE(obj); >> Range w64; >> uint64_t value; >> >> pci_bus_get_w64_range(h->bus, &w64); >> value = range_is_empty(&w64) ? 0 : range_upb(&w64) + 1; >> + if (s->pci_hole64_fix && value < PCI_HOST_PCI_HOLE64_END) { >> + value = PCI_HOST_PCI_HOLE64_END; >> + } >> visit_type_uint64(v, name, &value, errp); >> } >> >> @@ -857,6 +866,7 @@ static Property i440fx_props[] = { >> DEFINE_PROP_SIZE(PCI_HOST_PROP_PCI_HOLE64_SIZE, I440FXState, >> pci_hole64_size, DEFAULT_PCI_HOLE64_SIZE), >> DEFINE_PROP_UINT32("short_root_bus", I440FXState, short_root_bus, 0), >> + DEFINE_PROP_BOOL("x-pci-hole64-fix", I440FXState, pci_hole64_fix, true), >> DEFINE_PROP_END_OF_LIST(), >> }; >> >> diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c >> index 1ff648e80c..641213f177 100644 >> --- a/hw/pci-host/q35.c >> +++ b/hw/pci-host/q35.c >> @@ -104,11 +104,15 @@ static void q35_host_get_pci_hole64_start(Object *obj, Visitor *v, >> Error **errp) >> { >> PCIHostState *h = PCI_HOST_BRIDGE(obj); >> + Q35PCIHost *s = Q35_HOST_DEVICE(obj); >> Range w64; >> uint64_t value; >> >> pci_bus_get_w64_range(h->bus, &w64); >> value = range_is_empty(&w64) ? 0 : range_lob(&w64); >> + if (!value && s->pci_hole64_fix) { >> + value = pc_pci_hole64_start(); >> + } >> visit_type_uint64(v, name, &value, errp); >> } >> >> @@ -117,11 +121,15 @@ static void q35_host_get_pci_hole64_end(Object *obj, Visitor *v, >> Error **errp) >> { >> PCIHostState *h = PCI_HOST_BRIDGE(obj); >> + Q35PCIHost *s = Q35_HOST_DEVICE(obj); >> Range w64; >> uint64_t value; >> >> pci_bus_get_w64_range(h->bus, &w64); >> value = range_is_empty(&w64) ? 0 : range_upb(&w64) + 1; >> + if (s->pci_hole64_fix && value < PCI_HOST_PCI_HOLE64_END) { >> + value = PCI_HOST_PCI_HOLE64_END; >> + } >> visit_type_uint64(v, name, &value, errp); >> } >> >> @@ -143,6 +151,7 @@ static Property q35_host_props[] = { >> mch.below_4g_mem_size, 0), >> DEFINE_PROP_SIZE(PCI_HOST_ABOVE_4G_MEM_SIZE, Q35PCIHost, >> mch.above_4g_mem_size, 0), >> + DEFINE_PROP_BOOL("x-pci-hole64-fix", Q35PCIHost, pci_hole64_fix, true), >> DEFINE_PROP_END_OF_LIST(), >> }; >> >> diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h >> index 087d184ef5..2e98e8c943 100644 >> --- a/include/hw/i386/pc.h >> +++ b/include/hw/i386/pc.h >> @@ -239,6 +239,7 @@ void pc_guest_info_init(PCMachineState *pcms); >> #define PCI_HOST_BELOW_4G_MEM_SIZE "below-4g-mem-size" >> #define PCI_HOST_ABOVE_4G_MEM_SIZE "above-4g-mem-size" >> #define DEFAULT_PCI_HOLE64_SIZE (~0x0ULL) >> +#define PCI_HOST_PCI_HOLE64_END (0x1ULL << 40) >> >> >> void pc_pci_as_mapping_init(Object *owner, MemoryRegion *system_memory, >> @@ -249,6 +250,7 @@ void pc_memory_init(PCMachineState *pcms, >> MemoryRegion *system_memory, >> MemoryRegion *rom_memory, >> MemoryRegion **ram_memory); >> +uint64_t pc_pci_hole64_start(void); >> qemu_irq pc_allocate_cpu_irq(void); >> DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus); >> void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi, >> @@ -375,6 +377,14 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); >> .driver = TYPE_X86_CPU,\ >> .property = "x-hv-max-vps",\ >> .value = "0x40",\ >> + },{\ >> + .driver = "i440FX-pcihost",\ >> + .property = "x-pci-hole64-fix",\ >> + .value = "false",\ > should be "off" > OK >> + },{\ >> + .driver = "q35-pcihost",\ >> + .property = "x-pci-hole64-fix",\ >> + .value = "false",\ > ditto > OK, Thanks for the review, Marcel >> }, >> >> #define PC_COMPAT_2_9 \ >> diff --git a/include/hw/pci-host/q35.h b/include/hw/pci-host/q35.h >> index 58983c00b3..8f4ddde393 100644 >> --- a/include/hw/pci-host/q35.h >> +++ b/include/hw/pci-host/q35.h >> @@ -68,6 +68,7 @@ typedef struct Q35PCIHost { >> PCIExpressHost parent_obj; >> /*< public >*/ >> >> + bool pci_hole64_fix; >> MCHPCIState mch; >> } Q35PCIHost; >> > ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [Qemu-devel] [PATCH] hw/pci-host: Fix x86 Host Bridges 64bit PCI hole 2017-10-18 9:58 [Qemu-devel] [PATCH] hw/pci-host: Fix x86 Host Bridges 64bit PCI hole Marcel Apfelbaum 2017-10-18 10:45 ` Igor Mammedov @ 2017-10-18 11:40 ` Laszlo Ersek 2017-10-19 9:30 ` Marcel Apfelbaum 1 sibling, 1 reply; 19+ messages in thread From: Laszlo Ersek @ 2017-10-18 11:40 UTC (permalink / raw) To: Marcel Apfelbaum, qemu-devel Cc: mst, pbonzini, ehabkost, Igor Mammedov, Dr. David Alan Gilbert, Gerd Hoffmann Hi Marcel, On 10/18/17 11:58, Marcel Apfelbaum wrote: > Currently there is no MMIO range over 4G > reserved for PCI hotplug. Since the 32bit PCI hole > depends on the number of cold-plugged PCI devices > and other factors, it is very possible is too small > to hotplug PCI devices with large BARs. > > Fix it by reserving all the address space after > RAM (and the reserved space for RAM hotplug), > but no more than 40bits. The later seems to be > QEMU's magic number for the Guest physical CPU addressable > bits and is safe with respect to migration. > > Note this is a regression since prev QEMU versions had > some range reserved for 64bit PCI hotplug. > > Signed-off-by: Marcel Apfelbaum <marcel@redhat.com> > --- > hw/i386/pc.c | 22 ++++++++++++++++++++++ > hw/pci-host/piix.c | 10 ++++++++++ > hw/pci-host/q35.c | 9 +++++++++ > include/hw/i386/pc.h | 10 ++++++++++ > include/hw/pci-host/q35.h | 1 + > 5 files changed, 52 insertions(+) - What are the symptoms of the problem? - What QEMU commit regressed the previous functionality? - How exactly is the regression (and this fix) exposed to the guest? I'm confused because semi-recently I've done multiple successful hotplugs, with OVMF, Q35, PCIe root ports, and PCI Express devices having large (64-bit) MMIO BARs. The examples include ivshmem / Linux guest (based on your hints) and vfio-pci (XL710 NIC) / Windows Server 2016 guest. By default, OVMF marks a 32GB area as 64-bit MMIO aperture, for BAR allocation. This area is above the normal memory (plus reservation for hotplug memory, if any). It is also GB-aligned. The aperture size can be changed (currently) with an experimental -fw_cfg switch (the fw_cfg file is called "opt/ovmf/X-PciMmio64Mb", it states the size of the 64-bit MMIO aperture as a decimal number of megabytes). The OVMF logic is described in the message of the following commit: https://github.com/tianocore/edk2/commit/7e5b1b670c382 If the new resource reservation hints are used for PCI Express root ports, then the 64-bit reservations are satisfied (i.e., programmed into the root ports) from the above-mentioned 64-bit aperture. Then -- as I understand it -- QEMU will generate the ACPI payload honoring those reservations too. This means the OS will be aware, and will have room for accepting hotplug. So... I'm not sure what the regression is, and how this patch fixes it. Is this about exposing a 64-bit MMIO aperture for hotplug purposes *without* the reservation hints? Either way, if QEMU now starts (or starts *again* -- I don't know) dictating the base and size of the 64-bit MMIO aperture, then it shouldn't just be exposed to the OS, via ACPI; it should also be exposed to the firmware, via fw_cfg. The "etc/reserved-memory-end" file is exposed already, so that's good. Two more fw_cfg files look necessary, for exposing the 64-bit hole's base and size. The base should be equal to or larger than "etc/reserved-memory-end". The size should replace the current (experimental) "opt/ovmf/X-PciMmio64Mb" fw_cfg file. A separate note about sizing the 64-bit PCI hole. In OVMF, the end of the 64-bit PCI hole determines the highest guest-physical address that the DXE phase might want to access (so that a UEFI driver for a PCI device can read/write a 64-bit BAR placed there). In turn this dictates how the identity-mapping page tables are built for the DXE phase -- the higher the max expected guest-phys address, the more RAM is needed for the page tables. Assuming QEMU exposes a fixed (1ULL << 40) value for the highest (exclusive) address, *and* OVMF is expected to follow QEMU's directions in the placement of the 64-bit PCI hole (via the new fw_cfg files), then OVMF will need the following RAM amount for building the page tables: * with 1GB page support: 3 pages (12 KB) * without 1GB page support: 1027 pages (4108 KB) This doesn't look overly bad -- but I don't think (1ULL << 40) is good enough. (1ULL << 40) corresponds to 1TB, and we want to have more *RAM* than that. The hotplug memory range comes on top, and then we should still have a 64-bit MMIO aperture. Let's say we want to go up to 4TB (1ULL << 42). DXE page table memory needs in OVMF: * with 1GB page support: 9 pages (36 KB) * without 1GB page support: 4105 pages (16420 KB) I don't expect these memory needs to be a problem, but we should be aware of them. Also, how do you plan to integrate this feature with SeaBIOS? Thanks Laszlo > > diff --git a/hw/i386/pc.c b/hw/i386/pc.c > index 05985d4927..4df20d2b99 100644 > --- a/hw/i386/pc.c > +++ b/hw/i386/pc.c > @@ -1448,6 +1448,28 @@ void pc_memory_init(PCMachineState *pcms, > pcms->ioapic_as = &address_space_memory; > } > > +uint64_t pc_pci_hole64_start(void) > +{ > + PCMachineState *pcms = PC_MACHINE(qdev_get_machine()); > + PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); > + uint64_t hole64_start = 0; > + > + if (pcmc->has_reserved_memory && pcms->hotplug_memory.base) { > + hole64_start = pcms->hotplug_memory.base; > + if (!pcmc->broken_reserved_end) { > + hole64_start += memory_region_size(&pcms->hotplug_memory.mr); > + } > + } else { > + hole64_start = 0x100000000ULL + pcms->above_4g_mem_size; > + } > + > + if (hole64_start > PCI_HOST_PCI_HOLE64_END) { > + hole64_start = PCI_HOST_PCI_HOLE64_END; > + } > + > + return hole64_start; > +} > + > qemu_irq pc_allocate_cpu_irq(void) > { > return qemu_allocate_irq(pic_irq_request, NULL, 0); > diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c > index dec345fd24..317a232972 100644 > --- a/hw/pci-host/piix.c > +++ b/hw/pci-host/piix.c > @@ -50,6 +50,7 @@ typedef struct I440FXState { > PCIHostState parent_obj; > Range pci_hole; > uint64_t pci_hole64_size; > + bool pci_hole64_fix; > uint32_t short_root_bus; > } I440FXState; > > @@ -243,11 +244,15 @@ static void i440fx_pcihost_get_pci_hole64_start(Object *obj, Visitor *v, > void *opaque, Error **errp) > { > PCIHostState *h = PCI_HOST_BRIDGE(obj); > + I440FXState *s = I440FX_PCI_HOST_BRIDGE(obj); > Range w64; > uint64_t value; > > pci_bus_get_w64_range(h->bus, &w64); > value = range_is_empty(&w64) ? 0 : range_lob(&w64); > + if (!value && s->pci_hole64_fix) { > + value = pc_pci_hole64_start(); > + } > visit_type_uint64(v, name, &value, errp); > } > > @@ -256,11 +261,15 @@ static void i440fx_pcihost_get_pci_hole64_end(Object *obj, Visitor *v, > Error **errp) > { > PCIHostState *h = PCI_HOST_BRIDGE(obj); > + I440FXState *s = I440FX_PCI_HOST_BRIDGE(obj); > Range w64; > uint64_t value; > > pci_bus_get_w64_range(h->bus, &w64); > value = range_is_empty(&w64) ? 0 : range_upb(&w64) + 1; > + if (s->pci_hole64_fix && value < PCI_HOST_PCI_HOLE64_END) { > + value = PCI_HOST_PCI_HOLE64_END; > + } > visit_type_uint64(v, name, &value, errp); > } > > @@ -857,6 +866,7 @@ static Property i440fx_props[] = { > DEFINE_PROP_SIZE(PCI_HOST_PROP_PCI_HOLE64_SIZE, I440FXState, > pci_hole64_size, DEFAULT_PCI_HOLE64_SIZE), > DEFINE_PROP_UINT32("short_root_bus", I440FXState, short_root_bus, 0), > + DEFINE_PROP_BOOL("x-pci-hole64-fix", I440FXState, pci_hole64_fix, true), > DEFINE_PROP_END_OF_LIST(), > }; > > diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c > index 1ff648e80c..641213f177 100644 > --- a/hw/pci-host/q35.c > +++ b/hw/pci-host/q35.c > @@ -104,11 +104,15 @@ static void q35_host_get_pci_hole64_start(Object *obj, Visitor *v, > Error **errp) > { > PCIHostState *h = PCI_HOST_BRIDGE(obj); > + Q35PCIHost *s = Q35_HOST_DEVICE(obj); > Range w64; > uint64_t value; > > pci_bus_get_w64_range(h->bus, &w64); > value = range_is_empty(&w64) ? 0 : range_lob(&w64); > + if (!value && s->pci_hole64_fix) { > + value = pc_pci_hole64_start(); > + } > visit_type_uint64(v, name, &value, errp); > } > > @@ -117,11 +121,15 @@ static void q35_host_get_pci_hole64_end(Object *obj, Visitor *v, > Error **errp) > { > PCIHostState *h = PCI_HOST_BRIDGE(obj); > + Q35PCIHost *s = Q35_HOST_DEVICE(obj); > Range w64; > uint64_t value; > > pci_bus_get_w64_range(h->bus, &w64); > value = range_is_empty(&w64) ? 0 : range_upb(&w64) + 1; > + if (s->pci_hole64_fix && value < PCI_HOST_PCI_HOLE64_END) { > + value = PCI_HOST_PCI_HOLE64_END; > + } > visit_type_uint64(v, name, &value, errp); > } > > @@ -143,6 +151,7 @@ static Property q35_host_props[] = { > mch.below_4g_mem_size, 0), > DEFINE_PROP_SIZE(PCI_HOST_ABOVE_4G_MEM_SIZE, Q35PCIHost, > mch.above_4g_mem_size, 0), > + DEFINE_PROP_BOOL("x-pci-hole64-fix", Q35PCIHost, pci_hole64_fix, true), > DEFINE_PROP_END_OF_LIST(), > }; > > diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h > index 087d184ef5..2e98e8c943 100644 > --- a/include/hw/i386/pc.h > +++ b/include/hw/i386/pc.h > @@ -239,6 +239,7 @@ void pc_guest_info_init(PCMachineState *pcms); > #define PCI_HOST_BELOW_4G_MEM_SIZE "below-4g-mem-size" > #define PCI_HOST_ABOVE_4G_MEM_SIZE "above-4g-mem-size" > #define DEFAULT_PCI_HOLE64_SIZE (~0x0ULL) > +#define PCI_HOST_PCI_HOLE64_END (0x1ULL << 40) > > > void pc_pci_as_mapping_init(Object *owner, MemoryRegion *system_memory, > @@ -249,6 +250,7 @@ void pc_memory_init(PCMachineState *pcms, > MemoryRegion *system_memory, > MemoryRegion *rom_memory, > MemoryRegion **ram_memory); > +uint64_t pc_pci_hole64_start(void); > qemu_irq pc_allocate_cpu_irq(void); > DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus); > void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi, > @@ -375,6 +377,14 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); > .driver = TYPE_X86_CPU,\ > .property = "x-hv-max-vps",\ > .value = "0x40",\ > + },{\ > + .driver = "i440FX-pcihost",\ > + .property = "x-pci-hole64-fix",\ > + .value = "false",\ > + },{\ > + .driver = "q35-pcihost",\ > + .property = "x-pci-hole64-fix",\ > + .value = "false",\ > }, > > #define PC_COMPAT_2_9 \ > diff --git a/include/hw/pci-host/q35.h b/include/hw/pci-host/q35.h > index 58983c00b3..8f4ddde393 100644 > --- a/include/hw/pci-host/q35.h > +++ b/include/hw/pci-host/q35.h > @@ -68,6 +68,7 @@ typedef struct Q35PCIHost { > PCIExpressHost parent_obj; > /*< public >*/ > > + bool pci_hole64_fix; > MCHPCIState mch; > } Q35PCIHost; > > ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [Qemu-devel] [PATCH] hw/pci-host: Fix x86 Host Bridges 64bit PCI hole 2017-10-18 11:40 ` Laszlo Ersek @ 2017-10-19 9:30 ` Marcel Apfelbaum 2017-10-19 10:41 ` Laszlo Ersek 2017-10-19 13:03 ` Gerd Hoffmann 0 siblings, 2 replies; 19+ messages in thread From: Marcel Apfelbaum @ 2017-10-19 9:30 UTC (permalink / raw) To: Laszlo Ersek, qemu-devel Cc: mst, pbonzini, ehabkost, Igor Mammedov, Dr. David Alan Gilbert, Gerd Hoffmann Hi Laszlo, On 18/10/2017 14:40, Laszlo Ersek wrote: > Hi Marcel, > > On 10/18/17 11:58, Marcel Apfelbaum wrote: >> Currently there is no MMIO range over 4G >> reserved for PCI hotplug. Since the 32bit PCI hole >> depends on the number of cold-plugged PCI devices >> and other factors, it is very possible is too small >> to hotplug PCI devices with large BARs. >> >> Fix it by reserving all the address space after >> RAM (and the reserved space for RAM hotplug), >> but no more than 40bits. The later seems to be >> QEMU's magic number for the Guest physical CPU addressable >> bits and is safe with respect to migration. >> >> Note this is a regression since prev QEMU versions had >> some range reserved for 64bit PCI hotplug. >> >> Signed-off-by: Marcel Apfelbaum <marcel@redhat.com> >> --- >> hw/i386/pc.c | 22 ++++++++++++++++++++++ >> hw/pci-host/piix.c | 10 ++++++++++ >> hw/pci-host/q35.c | 9 +++++++++ >> include/hw/i386/pc.h | 10 ++++++++++ >> include/hw/pci-host/q35.h | 1 + >> 5 files changed, 52 insertions(+) > > - What are the symptoms of the problem? > PCI devices with *relatively* large BARs can't be hot-plugged. The reason is QEMU does not reserve MMIO range over 4G and uses whatever remains on the 32bit PCI window. If you coldplug enough PCI devices you will end up with with a 64bit PCI window that covers only the cold-plugged PCI devices, still no room for hotplug. > - What QEMU commit regressed the previous functionality? > - commit 39848901818 pc: limit 64 bit hole to 2G by default shows us QEMU had the 64bit PCI hole, so it is a regression. - commit 2028fdf3791 piix: use 64 bit window programmed by guest leaves no room for PCI hotplug > - How exactly is the regression (and this fix) exposed to the guest? > The nuance is both above commits computed the actual MMIO range used by cold-plugged devices, but as side effect they influenced the remaining PCI window for hot-plugged devices. What the guest sees is the CRS MMIO range over 4G. > I'm confused because semi-recently I've done multiple successful > hotplugs, with OVMF, Q35, PCIe root ports, and PCI Express devices > having large (64-bit) MMIO BARs. The examples include ivshmem / Linux > guest (based on your hints) and vfio-pci (XL710 NIC) / Windows Server > 2016 guest. Cool! These are actually great news! Back to the issue in hand, a few things: 1. It works with Q35 only, but not with the I440FX machines. 2. The hints are used for the PCIe root ports, meaning for PCI bridges and addresses a slightly different issues: leave hotplug space for secondary buses, while this patch tackles hotplug space for root buses. > > By default, OVMF marks a 32GB area as 64-bit MMIO aperture, for BAR > allocation. This area is above the normal memory (plus reservation for > hotplug memory, if any). It is also GB-aligned. The aperture size can be > changed (currently) with an experimental -fw_cfg switch (the fw_cfg file > is called "opt/ovmf/X-PciMmio64Mb", it states the size of the 64-bit > MMIO aperture as a decimal number of megabytes). I'll try to explain how I see the "picture": CRS is created by QEMU and computed *after* the firmware finishes the PCI BARs allocations. Then *QEMU* decides how big it will be the 64bit PCI hole, not the firmware - see the past commits. So IMHO you don't need the opt/ovmf/X-PciMmio64Mb since the OVMF is not deciding this value. It actually should not care at all. > The OVMF logic is described in the message of the following commit: > > https://github.com/tianocore/edk2/commit/7e5b1b670c382 > > If the new resource reservation hints are used for PCI Express root > ports, then the 64-bit reservations are satisfied (i.e., programmed into > the root ports) from the above-mentioned 64-bit aperture. Then -- as I > understand it -- QEMU will generate the ACPI payload honoring those > reservations too. This means the OS will be aware, and will have room > for accepting hotplug. > > So... I'm not sure what the regression is, and how this patch fixes it. > I hope it is more clear now. > Is this about exposing a 64-bit MMIO aperture for hotplug purposes > *without* the reservation hints? > Again, the hints deal with secondary buses, this one with root buses. > > Either way, if QEMU now starts (or starts *again* -- I don't know) > dictating the base and size of the 64-bit MMIO aperture, then it > shouldn't just be exposed to the OS, via ACPI; it should also be exposed > to the firmware, via fw_cfg. The "etc/reserved-memory-end" file is > exposed already, so that's good. Two more fw_cfg files look necessary, > for exposing the 64-bit hole's base and size. The base should be equal > to or larger than "etc/reserved-memory-end". The size should replace the > current (experimental) "opt/ovmf/X-PciMmio64Mb" fw_cfg file. > Well, the PCI hotplug window may be handled by QEMU since it does not affect the firmware at all; it seems out of scope. See the above patches, QEMU always decided the hotplug windows, it does it for the memory and it has done it for the PCI hotplug window, I don't see something we have to gain from exposing it to the firmware. > > A separate note about sizing the 64-bit PCI hole. In OVMF, the end of > the 64-bit PCI hole determines the highest guest-physical address that > the DXE phase might want to access (so that a UEFI driver for a PCI > device can read/write a 64-bit BAR placed there). In turn this dictates > how the identity-mapping page tables are built for the DXE phase -- the > higher the max expected guest-phys address, the more RAM is needed for > the page tables. > > Assuming QEMU exposes a fixed (1ULL << 40) value for the highest > (exclusive) address, *and* OVMF is expected to follow QEMU's directions > in the placement of the 64-bit PCI hole (via the new fw_cfg files), then > OVMF will need the following RAM amount for building the page tables: > > * with 1GB page support: 3 pages (12 KB) > * without 1GB page support: 1027 pages (4108 KB) > > This doesn't look overly bad -- but I don't think (1ULL << 40) is good > enough. (1ULL << 40) corresponds to 1TB, and we want to have more *RAM* > than that. The hotplug memory range comes on top, and then we should > still have a 64-bit MMIO aperture. > > Let's say we want to go up to 4TB (1ULL << 42). DXE page table memory > needs in OVMF: > > * with 1GB page support: 9 pages (36 KB) > * without 1GB page support: 4105 pages (16420 KB) > > I don't expect these memory needs to be a problem, but we should be > aware of them. > QEMU's default value for CPU phys addressable bits is 40, and is used even is if you add -cpu host to command line and your cpu works with 42 phys bits or more (funny thing, if you set 40 bits phys on command line you get a warning, but the default is 40...) The value is safe for migration, we don't want to limit migration because of the PCI hotplug window, and in the same time we want a usable window. It seemed like a good trade-off. > > Also, how do you plan to integrate this feature with SeaBIOS? > Simple, no need :) . It just works as it should. SeaBIOS computes everything, *then* QEMU creates the CRs ranges based on firmware input and other properties. I think it works also with OVMF out of the box. A last note, when pxb/pxb-pcies will became hot-pluggable we need to leave some bits for them too. I wouldn't really want to add fw_config files for them too, or use a complicate fw_config, or rewrite the logic for all firmware if we don't have to. 64bit PCI window starts from memory(+reserved) end and finishes at 40bit. If you have 1T of RAM you get nothing for PCI hotplug, seems fair. When guests with more then 1T of RAM will become frequent, I suppose the QEMU's default CPU addressable bits will change and we will change it too. Thanks, Marcel > Thanks > Laszlo > >> >> diff --git a/hw/i386/pc.c b/hw/i386/pc.c >> index 05985d4927..4df20d2b99 100644 >> --- a/hw/i386/pc.c >> +++ b/hw/i386/pc.c >> @@ -1448,6 +1448,28 @@ void pc_memory_init(PCMachineState *pcms, >> pcms->ioapic_as = &address_space_memory; >> } >> >> +uint64_t pc_pci_hole64_start(void) >> +{ >> + PCMachineState *pcms = PC_MACHINE(qdev_get_machine()); >> + PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); >> + uint64_t hole64_start = 0; >> + >> + if (pcmc->has_reserved_memory && pcms->hotplug_memory.base) { >> + hole64_start = pcms->hotplug_memory.base; >> + if (!pcmc->broken_reserved_end) { >> + hole64_start += memory_region_size(&pcms->hotplug_memory.mr); >> + } >> + } else { >> + hole64_start = 0x100000000ULL + pcms->above_4g_mem_size; >> + } >> + >> + if (hole64_start > PCI_HOST_PCI_HOLE64_END) { >> + hole64_start = PCI_HOST_PCI_HOLE64_END; >> + } >> + >> + return hole64_start; >> +} >> + >> qemu_irq pc_allocate_cpu_irq(void) >> { >> return qemu_allocate_irq(pic_irq_request, NULL, 0); >> diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c >> index dec345fd24..317a232972 100644 >> --- a/hw/pci-host/piix.c >> +++ b/hw/pci-host/piix.c >> @@ -50,6 +50,7 @@ typedef struct I440FXState { >> PCIHostState parent_obj; >> Range pci_hole; >> uint64_t pci_hole64_size; >> + bool pci_hole64_fix; >> uint32_t short_root_bus; >> } I440FXState; >> >> @@ -243,11 +244,15 @@ static void i440fx_pcihost_get_pci_hole64_start(Object *obj, Visitor *v, >> void *opaque, Error **errp) >> { >> PCIHostState *h = PCI_HOST_BRIDGE(obj); >> + I440FXState *s = I440FX_PCI_HOST_BRIDGE(obj); >> Range w64; >> uint64_t value; >> >> pci_bus_get_w64_range(h->bus, &w64); >> value = range_is_empty(&w64) ? 0 : range_lob(&w64); >> + if (!value && s->pci_hole64_fix) { >> + value = pc_pci_hole64_start(); >> + } >> visit_type_uint64(v, name, &value, errp); >> } >> >> @@ -256,11 +261,15 @@ static void i440fx_pcihost_get_pci_hole64_end(Object *obj, Visitor *v, >> Error **errp) >> { >> PCIHostState *h = PCI_HOST_BRIDGE(obj); >> + I440FXState *s = I440FX_PCI_HOST_BRIDGE(obj); >> Range w64; >> uint64_t value; >> >> pci_bus_get_w64_range(h->bus, &w64); >> value = range_is_empty(&w64) ? 0 : range_upb(&w64) + 1; >> + if (s->pci_hole64_fix && value < PCI_HOST_PCI_HOLE64_END) { >> + value = PCI_HOST_PCI_HOLE64_END; >> + } >> visit_type_uint64(v, name, &value, errp); >> } >> >> @@ -857,6 +866,7 @@ static Property i440fx_props[] = { >> DEFINE_PROP_SIZE(PCI_HOST_PROP_PCI_HOLE64_SIZE, I440FXState, >> pci_hole64_size, DEFAULT_PCI_HOLE64_SIZE), >> DEFINE_PROP_UINT32("short_root_bus", I440FXState, short_root_bus, 0), >> + DEFINE_PROP_BOOL("x-pci-hole64-fix", I440FXState, pci_hole64_fix, true), >> DEFINE_PROP_END_OF_LIST(), >> }; >> >> diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c >> index 1ff648e80c..641213f177 100644 >> --- a/hw/pci-host/q35.c >> +++ b/hw/pci-host/q35.c >> @@ -104,11 +104,15 @@ static void q35_host_get_pci_hole64_start(Object *obj, Visitor *v, >> Error **errp) >> { >> PCIHostState *h = PCI_HOST_BRIDGE(obj); >> + Q35PCIHost *s = Q35_HOST_DEVICE(obj); >> Range w64; >> uint64_t value; >> >> pci_bus_get_w64_range(h->bus, &w64); >> value = range_is_empty(&w64) ? 0 : range_lob(&w64); >> + if (!value && s->pci_hole64_fix) { >> + value = pc_pci_hole64_start(); >> + } >> visit_type_uint64(v, name, &value, errp); >> } >> >> @@ -117,11 +121,15 @@ static void q35_host_get_pci_hole64_end(Object *obj, Visitor *v, >> Error **errp) >> { >> PCIHostState *h = PCI_HOST_BRIDGE(obj); >> + Q35PCIHost *s = Q35_HOST_DEVICE(obj); >> Range w64; >> uint64_t value; >> >> pci_bus_get_w64_range(h->bus, &w64); >> value = range_is_empty(&w64) ? 0 : range_upb(&w64) + 1; >> + if (s->pci_hole64_fix && value < PCI_HOST_PCI_HOLE64_END) { >> + value = PCI_HOST_PCI_HOLE64_END; >> + } >> visit_type_uint64(v, name, &value, errp); >> } >> >> @@ -143,6 +151,7 @@ static Property q35_host_props[] = { >> mch.below_4g_mem_size, 0), >> DEFINE_PROP_SIZE(PCI_HOST_ABOVE_4G_MEM_SIZE, Q35PCIHost, >> mch.above_4g_mem_size, 0), >> + DEFINE_PROP_BOOL("x-pci-hole64-fix", Q35PCIHost, pci_hole64_fix, true), >> DEFINE_PROP_END_OF_LIST(), >> }; >> >> diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h >> index 087d184ef5..2e98e8c943 100644 >> --- a/include/hw/i386/pc.h >> +++ b/include/hw/i386/pc.h >> @@ -239,6 +239,7 @@ void pc_guest_info_init(PCMachineState *pcms); >> #define PCI_HOST_BELOW_4G_MEM_SIZE "below-4g-mem-size" >> #define PCI_HOST_ABOVE_4G_MEM_SIZE "above-4g-mem-size" >> #define DEFAULT_PCI_HOLE64_SIZE (~0x0ULL) >> +#define PCI_HOST_PCI_HOLE64_END (0x1ULL << 40) >> >> >> void pc_pci_as_mapping_init(Object *owner, MemoryRegion *system_memory, >> @@ -249,6 +250,7 @@ void pc_memory_init(PCMachineState *pcms, >> MemoryRegion *system_memory, >> MemoryRegion *rom_memory, >> MemoryRegion **ram_memory); >> +uint64_t pc_pci_hole64_start(void); >> qemu_irq pc_allocate_cpu_irq(void); >> DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus); >> void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi, >> @@ -375,6 +377,14 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); >> .driver = TYPE_X86_CPU,\ >> .property = "x-hv-max-vps",\ >> .value = "0x40",\ >> + },{\ >> + .driver = "i440FX-pcihost",\ >> + .property = "x-pci-hole64-fix",\ >> + .value = "false",\ >> + },{\ >> + .driver = "q35-pcihost",\ >> + .property = "x-pci-hole64-fix",\ >> + .value = "false",\ >> }, >> >> #define PC_COMPAT_2_9 \ >> diff --git a/include/hw/pci-host/q35.h b/include/hw/pci-host/q35.h >> index 58983c00b3..8f4ddde393 100644 >> --- a/include/hw/pci-host/q35.h >> +++ b/include/hw/pci-host/q35.h >> @@ -68,6 +68,7 @@ typedef struct Q35PCIHost { >> PCIExpressHost parent_obj; >> /*< public >*/ >> >> + bool pci_hole64_fix; >> MCHPCIState mch; >> } Q35PCIHost; >> >> > ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [Qemu-devel] [PATCH] hw/pci-host: Fix x86 Host Bridges 64bit PCI hole 2017-10-19 9:30 ` Marcel Apfelbaum @ 2017-10-19 10:41 ` Laszlo Ersek 2017-10-19 11:29 ` Marcel Apfelbaum 2017-10-19 13:03 ` Gerd Hoffmann 1 sibling, 1 reply; 19+ messages in thread From: Laszlo Ersek @ 2017-10-19 10:41 UTC (permalink / raw) To: Marcel Apfelbaum, qemu-devel Cc: mst, pbonzini, ehabkost, Igor Mammedov, Dr. David Alan Gilbert, Gerd Hoffmann On 10/19/17 11:30, Marcel Apfelbaum wrote: > > Hi Laszlo, > > On 18/10/2017 14:40, Laszlo Ersek wrote: >> Hi Marcel, >> >> On 10/18/17 11:58, Marcel Apfelbaum wrote: >>> Currently there is no MMIO range over 4G >>> reserved for PCI hotplug. Since the 32bit PCI hole >>> depends on the number of cold-plugged PCI devices >>> and other factors, it is very possible is too small >>> to hotplug PCI devices with large BARs. >>> >>> Fix it by reserving all the address space after >>> RAM (and the reserved space for RAM hotplug), >>> but no more than 40bits. The later seems to be >>> QEMU's magic number for the Guest physical CPU addressable >>> bits and is safe with respect to migration. >>> >>> Note this is a regression since prev QEMU versions had >>> some range reserved for 64bit PCI hotplug. >>> >>> Signed-off-by: Marcel Apfelbaum <marcel@redhat.com> >>> --- >>> hw/i386/pc.c | 22 ++++++++++++++++++++++ >>> hw/pci-host/piix.c | 10 ++++++++++ >>> hw/pci-host/q35.c | 9 +++++++++ >>> include/hw/i386/pc.h | 10 ++++++++++ >>> include/hw/pci-host/q35.h | 1 + >>> 5 files changed, 52 insertions(+) >> >> - What are the symptoms of the problem? >> > > PCI devices with *relatively* large BARs can't be hot-plugged. > The reason is QEMU does not reserve MMIO range over 4G and > uses whatever remains on the 32bit PCI window. > > If you coldplug enough PCI devices you will end up with > with a 64bit PCI window that covers only the cold-plugged PCI > devices, still no room for hotplug. > >> - What QEMU commit regressed the previous functionality? >> > > - commit 39848901818 pc: limit 64 bit hole to 2G by default > shows us QEMU had the 64bit PCI hole, so it is a regression. (Side remark: this commit was part of release v1.6.0, and at that time we also had the "etc/pci-info" fw_cfg file.) > > - commit 2028fdf3791 piix: use 64 bit window programmed by guest > leaves no room for PCI hotplug (Side remark: part of v1.7.0, from 2013. So, we can call this a regression, but then it's a very old one. Of course this is *not* a counter-argument to your patch, or commit message wording; it's just that I'm surprised that the issue could remain dormant this long -- usually users report regressions very quickly.) > >> - How exactly is the regression (and this fix) exposed to the guest? >> > > The nuance is both above commits computed the actual MMIO > range used by cold-plugged devices, but as side effect > they influenced the remaining PCI window for hot-plugged devices. > > What the guest sees is the CRS MMIO range over 4G. OK, understood. > >> I'm confused because semi-recently I've done multiple successful >> hotplugs, with OVMF, Q35, PCIe root ports, and PCI Express devices >> having large (64-bit) MMIO BARs. The examples include ivshmem / Linux >> guest (based on your hints) and vfio-pci (XL710 NIC) / Windows Server >> 2016 guest. > > Cool! These are actually great news! > Back to the issue in hand, a few things: > 1. It works with Q35 only, but not with the I440FX machines. OK. > 2. The hints are used for the PCIe root ports, meaning for PCI bridges > and addresses a slightly different issues: leave hotplug space > for secondary buses, while this patch tackles hotplug > space for root buses. OK. > >> >> By default, OVMF marks a 32GB area as 64-bit MMIO aperture, for BAR >> allocation. This area is above the normal memory (plus reservation for >> hotplug memory, if any). It is also GB-aligned. The aperture size can be >> changed (currently) with an experimental -fw_cfg switch (the fw_cfg file >> is called "opt/ovmf/X-PciMmio64Mb", it states the size of the 64-bit >> MMIO aperture as a decimal number of megabytes). > > I'll try to explain how I see the "picture": > CRS is created by QEMU and computed *after* the firmware finishes > the PCI BARs allocations. Yes. > Then *QEMU* decides how big it will be the > 64bit PCI hole, not the firmware - see the past commits. Yes, first the guest-side programming happens, then QEMU (re-)calculates the ACPI payload once the right ACPI fw_cfg file is selected / read. This re-calculation (including the CRS) observes the previously programmed values. > > So IMHO you don't need the opt/ovmf/X-PciMmio64Mb since the OVMF > is not deciding this value. It actually should not care at all. OVMF cannot *not* care. It must internally provide (to other, platform-independent components in edk2) the 64-bit MMIO aperture that the BARs can be allocated from. But maybe I'm totally misunderstanding this patch! Are you saying that the patch makes the following difference only: when the CRS is recalculated (i.e., after the PCI enumeration has completed in the guest, and the ACPI linker/loader fw_cfg files are opened), then you only grow / round up the 64-bit range? That would make sense, because a *superset* aperture exposed to the OS via the _CRS does not invalidate any allocations done by the firmware earlier. Furthermore, the firmware itself need not learn about the details of said superset. In this sense I agree that OVMF should not care at all. [snip] >> Also, how do you plan to integrate this feature with SeaBIOS? (Ever since I sent my previous response, I've been glad that I finished my email with this question -- I really think OVMF and SeaBIOS should behave uniformly in this regard. So I'm happy about your answer:) > Simple, no need :) . It just works as it should. > SeaBIOS computes everything, *then* QEMU creates the CRs ranges > based on firmware input and other properties. > I think it works also with OVMF out of the box. OK, so this explains it all, thank you very much -- my question is if you could spell out those "other properties" in a bit more detail (I apologize if these should be obvious to me already, from the commit message and the patch): (1) How exactly does the enlarged 64-bit hole remain a *superset* of the firmware-programmed BARs? Can you point me to a location in the code? (2) What happens if we do have more than 1TB RAM? > A last note, when pxb/pxb-pcies will became hot-pluggable > we need to leave some bits for them too. I wouldn't > really want to add fw_config files for them too, or > use a complicate fw_config, or rewrite > the logic for all firmware if we don't have to. OK. > > 64bit PCI window starts from memory(+reserved) end > and finishes at 40bit. OK, this seems to (partly) answer my question (1) -- can you please confirm that the finishes at 40bit part does not imply forcefully truncating the CRS at 1TB, in case the firmware already allocated BARs higher than that? > If you have 1T of RAM you > get nothing for PCI hotplug, seems fair. This is fine (as the answer to my question (2)) as long as you mean it in the following sense: "QEMU will simply *not grow* the MMIO in the _CRS past 1TB". > When guests with more then 1T of RAM will become > frequent, I suppose the QEMU's default CPU addressable > bits will change and we will change it too. Thanks Marcel, I feel a lot safer now. Please confirm that the changes can only grow the area exposed in the _CRS -- i.e., only lower or preserve the base, and only raise or preserve the absolute limit. I'll shut up then :) Thanks! Laszlo ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [Qemu-devel] [PATCH] hw/pci-host: Fix x86 Host Bridges 64bit PCI hole 2017-10-19 10:41 ` Laszlo Ersek @ 2017-10-19 11:29 ` Marcel Apfelbaum 2017-10-19 11:52 ` Laszlo Ersek 0 siblings, 1 reply; 19+ messages in thread From: Marcel Apfelbaum @ 2017-10-19 11:29 UTC (permalink / raw) To: Laszlo Ersek, qemu-devel Cc: mst, pbonzini, ehabkost, Igor Mammedov, Dr. David Alan Gilbert, Gerd Hoffmann Hi Laszlo, On 19/10/2017 13:41, Laszlo Ersek wrote: > On 10/19/17 11:30, Marcel Apfelbaum wrote: >> >> Hi Laszlo, >> >> On 18/10/2017 14:40, Laszlo Ersek wrote: >>> Hi Marcel, >>> >>> On 10/18/17 11:58, Marcel Apfelbaum wrote: >>>> Currently there is no MMIO range over 4G >>>> reserved for PCI hotplug. Since the 32bit PCI hole >>>> depends on the number of cold-plugged PCI devices >>>> and other factors, it is very possible is too small >>>> to hotplug PCI devices with large BARs. >>>> >>>> Fix it by reserving all the address space after >>>> RAM (and the reserved space for RAM hotplug), >>>> but no more than 40bits. The later seems to be >>>> QEMU's magic number for the Guest physical CPU addressable >>>> bits and is safe with respect to migration. >>>> >>>> Note this is a regression since prev QEMU versions had >>>> some range reserved for 64bit PCI hotplug. >>>> >>>> Signed-off-by: Marcel Apfelbaum <marcel@redhat.com> >>>> --- >>>> hw/i386/pc.c | 22 ++++++++++++++++++++++ >>>> hw/pci-host/piix.c | 10 ++++++++++ >>>> hw/pci-host/q35.c | 9 +++++++++ >>>> include/hw/i386/pc.h | 10 ++++++++++ >>>> include/hw/pci-host/q35.h | 1 + >>>> 5 files changed, 52 insertions(+) >>> >>> - What are the symptoms of the problem? >>> >> >> PCI devices with *relatively* large BARs can't be hot-plugged. >> The reason is QEMU does not reserve MMIO range over 4G and >> uses whatever remains on the 32bit PCI window. >> >> If you coldplug enough PCI devices you will end up with >> with a 64bit PCI window that covers only the cold-plugged PCI >> devices, still no room for hotplug. >> >>> - What QEMU commit regressed the previous functionality? >>> >> >> - commit 39848901818 pc: limit 64 bit hole to 2G by default >> shows us QEMU had the 64bit PCI hole, so it is a regression. > > (Side remark: this commit was part of release v1.6.0, and at that time > we also had the "etc/pci-info" fw_cfg file.) > >> >> - commit 2028fdf3791 piix: use 64 bit window programmed by guest >> leaves no room for PCI hotplug > > (Side remark: part of v1.7.0, from 2013. So, we can call this a > regression, but then it's a very old one. > > Of course this is *not* a counter-argument to your patch, or commit > message wording; it's just that I'm surprised that the issue could > remain dormant this long -- usually users report regressions very quickly.) > I also thought I am proposing a new feature and I was pretty amazed I actually fix something that worked before. >> >>> - How exactly is the regression (and this fix) exposed to the guest? >>> >> >> The nuance is both above commits computed the actual MMIO >> range used by cold-plugged devices, but as side effect >> they influenced the remaining PCI window for hot-plugged devices. >> >> What the guest sees is the CRS MMIO range over 4G. > > OK, understood. > >> >>> I'm confused because semi-recently I've done multiple successful >>> hotplugs, with OVMF, Q35, PCIe root ports, and PCI Express devices >>> having large (64-bit) MMIO BARs. The examples include ivshmem / Linux >>> guest (based on your hints) and vfio-pci (XL710 NIC) / Windows Server >>> 2016 guest. >> >> Cool! These are actually great news! >> Back to the issue in hand, a few things: >> 1. It works with Q35 only, but not with the I440FX machines. > > OK. > >> 2. The hints are used for the PCIe root ports, meaning for PCI bridges >> and addresses a slightly different issues: leave hotplug space >> for secondary buses, while this patch tackles hotplug >> space for root buses. > > OK. > >> >>> >>> By default, OVMF marks a 32GB area as 64-bit MMIO aperture, for BAR >>> allocation. This area is above the normal memory (plus reservation for >>> hotplug memory, if any). It is also GB-aligned. The aperture size can be >>> changed (currently) with an experimental -fw_cfg switch (the fw_cfg file >>> is called "opt/ovmf/X-PciMmio64Mb", it states the size of the 64-bit >>> MMIO aperture as a decimal number of megabytes). >> >> I'll try to explain how I see the "picture": >> CRS is created by QEMU and computed *after* the firmware finishes >> the PCI BARs allocations. > > Yes. > >> Then *QEMU* decides how big it will be the >> 64bit PCI hole, not the firmware - see the past commits. > > Yes, first the guest-side programming happens, then QEMU (re-)calculates > the ACPI payload once the right ACPI fw_cfg file is selected / read. > This re-calculation (including the CRS) observes the previously > programmed values. > >> >> So IMHO you don't need the opt/ovmf/X-PciMmio64Mb since the OVMF >> is not deciding this value. It actually should not care at all. > > OVMF cannot *not* care. It must internally provide (to other, > platform-independent components in edk2) the 64-bit MMIO aperture that > the BARs can be allocated from. > > > But maybe I'm totally misunderstanding this patch! Are you saying that > the patch makes the following difference only: when the CRS is > recalculated (i.e., after the PCI enumeration has completed in the > guest, and the ACPI linker/loader fw_cfg files are opened), then you > only grow / round up the 64-bit range? > Exactly. > That would make sense, because a *superset* aperture exposed to the OS > via the _CRS does not invalidate any allocations done by the firmware > earlier. Furthermore, the firmware itself need not learn about the > details of said superset. In this sense I agree that OVMF should not > care at all. > Exactly, the range is a superset. > [snip] > >>> Also, how do you plan to integrate this feature with SeaBIOS? > > (Ever since I sent my previous response, I've been glad that I finished > my email with this question -- I really think OVMF and SeaBIOS should > behave uniformly in this regard. So I'm happy about your answer:) > >> Simple, no need :) . It just works as it should. >> SeaBIOS computes everything, *then* QEMU creates the CRs ranges >> based on firmware input and other properties. >> I think it works also with OVMF out of the box. > > OK, so this explains it all, thank you very much -- my question is if > you could spell out those "other properties" in a bit more detail (I > apologize if these should be obvious to me already, from the commit > message and the patch): > "Other properties" was a vague link to max RAM, and things like that. Nothing specific. > (1) How exactly does the enlarged 64-bit hole remain a *superset* of the > firmware-programmed BARs? Can you point me to a location in the code? > Sure in the patch itself: We first take the firmware computed ranges: pci_bus_get_w64_range(h->bus, &w64); Look at the *get_pci_hole64_start -> If the firmware set it we don't touch. And to the *get_pci_hole64_end -> We only update the value if is smaller than what firmware set. > (2) What happens if we do have more than 1TB RAM? > You get no 64bit "hot-pluggable" reserved range. The code dealing with that is: + if (hole64_start > PCI_HOST_PCI_HOLE64_END) { + hole64_start = PCI_HOST_PCI_HOLE64_END; + } And since the end is hard-coded to PCI_HOST_PCI_HOLE64_END we have a 0 size region which gets discarded. An interesting case is if we have more than 1T RAM and the firmware assigns BARs over that, we are increasing the 64bit window to include some of the RAM... A better approach is to check against the actual hole64 end and not the magic value. I'll send a v3, thanks! >> A last note, when pxb/pxb-pcies will became hot-pluggable >> we need to leave some bits for them too. I wouldn't >> really want to add fw_config files for them too, or >> use a complicate fw_config, or rewrite >> the logic for all firmware if we don't have to. > > OK. > >> >> 64bit PCI window starts from memory(+reserved) end >> and finishes at 40bit. > > OK, this seems to (partly) answer my question (1) -- can you please > confirm that the > > finishes at 40bit > > part does not imply forcefully truncating the CRS at 1TB, in case the > firmware already allocated BARs higher than that? > I confirm: + if (s->pci_hole64_fix && value < PCI_HOST_PCI_HOLE64_END) { + value = PCI_HOST_PCI_HOLE64_END; + } I touch the value only if is smaller than what the firmware set. >> If you have 1T of RAM you >> get nothing for PCI hotplug, seems fair. > > This is fine (as the answer to my question (2)) as long as you mean it > in the following sense: "QEMU will simply *not grow* the MMIO in the > _CRS past 1TB". > >> When guests with more then 1T of RAM will become >> frequent, I suppose the QEMU's default CPU addressable >> bits will change and we will change it too. > > Thanks Marcel, I feel a lot safer now. > > Please confirm that the changes can only grow the area exposed in the > _CRS -- i.e., only lower or preserve the base, and only raise or > preserve the absolute limit. I'll shut up then :) > Confirmed and thanks for the review! Thanks, Marcel > Thanks! > Laszlo > ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [Qemu-devel] [PATCH] hw/pci-host: Fix x86 Host Bridges 64bit PCI hole 2017-10-19 11:29 ` Marcel Apfelbaum @ 2017-10-19 11:52 ` Laszlo Ersek 0 siblings, 0 replies; 19+ messages in thread From: Laszlo Ersek @ 2017-10-19 11:52 UTC (permalink / raw) To: Marcel Apfelbaum, qemu-devel Cc: mst, pbonzini, ehabkost, Igor Mammedov, Dr. David Alan Gilbert, Gerd Hoffmann On 10/19/17 13:29, Marcel Apfelbaum wrote: > Hi Laszlo, > > On 19/10/2017 13:41, Laszlo Ersek wrote: >> On 10/19/17 11:30, Marcel Apfelbaum wrote: >>> >>> Hi Laszlo, >>> >>> On 18/10/2017 14:40, Laszlo Ersek wrote: >>>> Hi Marcel, >>>> >>>> On 10/18/17 11:58, Marcel Apfelbaum wrote: >>>>> Currently there is no MMIO range over 4G >>>>> reserved for PCI hotplug. Since the 32bit PCI hole >>>>> depends on the number of cold-plugged PCI devices >>>>> and other factors, it is very possible is too small >>>>> to hotplug PCI devices with large BARs. >>>>> >>>>> Fix it by reserving all the address space after >>>>> RAM (and the reserved space for RAM hotplug), >>>>> but no more than 40bits. The later seems to be >>>>> QEMU's magic number for the Guest physical CPU addressable >>>>> bits and is safe with respect to migration. >>>>> >>>>> Note this is a regression since prev QEMU versions had >>>>> some range reserved for 64bit PCI hotplug. >>>>> >>>>> Signed-off-by: Marcel Apfelbaum <marcel@redhat.com> >>>>> --- >>>>> hw/i386/pc.c | 22 ++++++++++++++++++++++ >>>>> hw/pci-host/piix.c | 10 ++++++++++ >>>>> hw/pci-host/q35.c | 9 +++++++++ >>>>> include/hw/i386/pc.h | 10 ++++++++++ >>>>> include/hw/pci-host/q35.h | 1 + >>>>> 5 files changed, 52 insertions(+) >>>> >>>> - What are the symptoms of the problem? >>>> >>> >>> PCI devices with *relatively* large BARs can't be hot-plugged. >>> The reason is QEMU does not reserve MMIO range over 4G and >>> uses whatever remains on the 32bit PCI window. >>> >>> If you coldplug enough PCI devices you will end up with >>> with a 64bit PCI window that covers only the cold-plugged PCI >>> devices, still no room for hotplug. >>> >>>> - What QEMU commit regressed the previous functionality? >>>> >>> >>> - commit 39848901818 pc: limit 64 bit hole to 2G by default >>> shows us QEMU had the 64bit PCI hole, so it is a regression. >> >> (Side remark: this commit was part of release v1.6.0, and at that time >> we also had the "etc/pci-info" fw_cfg file.) >> >>> >>> - commit 2028fdf3791 piix: use 64 bit window programmed by guest >>> leaves no room for PCI hotplug >> >> (Side remark: part of v1.7.0, from 2013. So, we can call this a >> regression, but then it's a very old one. >> >> Of course this is *not* a counter-argument to your patch, or commit >> message wording; it's just that I'm surprised that the issue could >> remain dormant this long -- usually users report regressions very >> quickly.) >> > > I also thought I am proposing a new feature and I was pretty amazed > I actually fix something that worked before. > >>> >>>> - How exactly is the regression (and this fix) exposed to the guest? >>>> >>> >>> The nuance is both above commits computed the actual MMIO >>> range used by cold-plugged devices, but as side effect >>> they influenced the remaining PCI window for hot-plugged devices. >>> >>> What the guest sees is the CRS MMIO range over 4G. >> >> OK, understood. >> >>> >>>> I'm confused because semi-recently I've done multiple successful >>>> hotplugs, with OVMF, Q35, PCIe root ports, and PCI Express devices >>>> having large (64-bit) MMIO BARs. The examples include ivshmem / Linux >>>> guest (based on your hints) and vfio-pci (XL710 NIC) / Windows Server >>>> 2016 guest. >>> >>> Cool! These are actually great news! >>> Back to the issue in hand, a few things: >>> 1. It works with Q35 only, but not with the I440FX machines. >> >> OK. >> >>> 2. The hints are used for the PCIe root ports, meaning for PCI bridges >>> and addresses a slightly different issues: leave hotplug space >>> for secondary buses, while this patch tackles hotplug >>> space for root buses. >> >> OK. >> >>> >>>> >>>> By default, OVMF marks a 32GB area as 64-bit MMIO aperture, for BAR >>>> allocation. This area is above the normal memory (plus reservation for >>>> hotplug memory, if any). It is also GB-aligned. The aperture size >>>> can be >>>> changed (currently) with an experimental -fw_cfg switch (the fw_cfg >>>> file >>>> is called "opt/ovmf/X-PciMmio64Mb", it states the size of the 64-bit >>>> MMIO aperture as a decimal number of megabytes). >>> >>> I'll try to explain how I see the "picture": >>> CRS is created by QEMU and computed *after* the firmware finishes >>> the PCI BARs allocations. >> >> Yes. >> >>> Then *QEMU* decides how big it will be the >>> 64bit PCI hole, not the firmware - see the past commits. >> >> Yes, first the guest-side programming happens, then QEMU (re-)calculates >> the ACPI payload once the right ACPI fw_cfg file is selected / read. >> This re-calculation (including the CRS) observes the previously >> programmed values. >> >>> >>> So IMHO you don't need the opt/ovmf/X-PciMmio64Mb since the OVMF >>> is not deciding this value. It actually should not care at all. >> >> OVMF cannot *not* care. It must internally provide (to other, >> platform-independent components in edk2) the 64-bit MMIO aperture that >> the BARs can be allocated from. >> >> >> But maybe I'm totally misunderstanding this patch! Are you saying that >> the patch makes the following difference only: when the CRS is >> recalculated (i.e., after the PCI enumeration has completed in the >> guest, and the ACPI linker/loader fw_cfg files are opened), then you >> only grow / round up the 64-bit range? >> > > Exactly. > >> That would make sense, because a *superset* aperture exposed to the OS >> via the _CRS does not invalidate any allocations done by the firmware >> earlier. Furthermore, the firmware itself need not learn about the >> details of said superset. In this sense I agree that OVMF should not >> care at all. >> > > Exactly, the range is a superset. > >> [snip] >> >>>> Also, how do you plan to integrate this feature with SeaBIOS? >> >> (Ever since I sent my previous response, I've been glad that I finished >> my email with this question -- I really think OVMF and SeaBIOS should >> behave uniformly in this regard. So I'm happy about your answer:) >> >>> Simple, no need :) . It just works as it should. >>> SeaBIOS computes everything, *then* QEMU creates the CRs ranges >>> based on firmware input and other properties. >>> I think it works also with OVMF out of the box. >> >> OK, so this explains it all, thank you very much -- my question is if >> you could spell out those "other properties" in a bit more detail (I >> apologize if these should be obvious to me already, from the commit >> message and the patch): >> > > "Other properties" was a vague link to max RAM, and things like that. > Nothing specific. > >> (1) How exactly does the enlarged 64-bit hole remain a *superset* of the >> firmware-programmed BARs? Can you point me to a location in the code? >> > > Sure in the patch itself: > We first take the firmware computed ranges: > pci_bus_get_w64_range(h->bus, &w64); > Look at the *get_pci_hole64_start -> If the firmware set it we don't > touch. > And to the *get_pci_hole64_end -> We only update the value if is smaller > than what firmware set. > > > >> (2) What happens if we do have more than 1TB RAM? >> > > You get no 64bit "hot-pluggable" reserved range. > The code dealing with that is: > > + if (hole64_start > PCI_HOST_PCI_HOLE64_END) { > + hole64_start = PCI_HOST_PCI_HOLE64_END; > + } > > And since the end is hard-coded to PCI_HOST_PCI_HOLE64_END > we have a 0 size region which gets discarded. > > An interesting case is if we have more than 1T RAM and the > firmware assigns BARs over that, we are increasing the > 64bit window to include some of the RAM... > > A better approach is to check against the actual hole64 > end and not the magic value. > I'll send a v3, thanks! Ah, right; in v1 -- and possibly v2, I haven't looked yet -- hole64_start will be set to PCI_HOST_PCI_HOLE64_END, in preparation for the end to be set the exact same way (resulting in an empty range) -- however, the end won't be modified at all if it's already above PCI_HOST_PCI_HOLE64_END, so we'll end up with a non-empty range that intersects with RAM. Please CC me on v3 too; I feel that I'm now better equipped to comment :) Thanks! Laszlo > >>> A last note, when pxb/pxb-pcies will became hot-pluggable >>> we need to leave some bits for them too. I wouldn't >>> really want to add fw_config files for them too, or >>> use a complicate fw_config, or rewrite >>> the logic for all firmware if we don't have to. >> >> OK. >> >>> >>> 64bit PCI window starts from memory(+reserved) end >>> and finishes at 40bit. >> >> OK, this seems to (partly) answer my question (1) -- can you please >> confirm that the >> >> finishes at 40bit >> >> part does not imply forcefully truncating the CRS at 1TB, in case the >> firmware already allocated BARs higher than that? >> > > I confirm: > > + if (s->pci_hole64_fix && value < PCI_HOST_PCI_HOLE64_END) { > + value = PCI_HOST_PCI_HOLE64_END; > + } > > I touch the value only if is smaller than what the firmware set. > >>> If you have 1T of RAM you >>> get nothing for PCI hotplug, seems fair. >> >> This is fine (as the answer to my question (2)) as long as you mean it >> in the following sense: "QEMU will simply *not grow* the MMIO in the >> _CRS past 1TB". >> >>> When guests with more then 1T of RAM will become >>> frequent, I suppose the QEMU's default CPU addressable >>> bits will change and we will change it too. >> >> Thanks Marcel, I feel a lot safer now. >> >> Please confirm that the changes can only grow the area exposed in the >> _CRS -- i.e., only lower or preserve the base, and only raise or >> preserve the absolute limit. I'll shut up then :) >> > > Confirmed and thanks for the review! > > Thanks, > Marcel > >> Thanks! >> Laszlo >> > ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [Qemu-devel] [PATCH] hw/pci-host: Fix x86 Host Bridges 64bit PCI hole 2017-10-19 9:30 ` Marcel Apfelbaum 2017-10-19 10:41 ` Laszlo Ersek @ 2017-10-19 13:03 ` Gerd Hoffmann 2017-10-19 13:34 ` Marcel Apfelbaum 1 sibling, 1 reply; 19+ messages in thread From: Gerd Hoffmann @ 2017-10-19 13:03 UTC (permalink / raw) To: Marcel Apfelbaum, Laszlo Ersek, qemu-devel Cc: mst, pbonzini, ehabkost, Igor Mammedov, Dr. David Alan Gilbert Hi, > - commit 39848901818 pc: limit 64 bit hole to 2G by default > shows us QEMU had the 64bit PCI hole, so it is a regression. commit message says: <quote> It turns out that some 32 bit windows guests crash if 64 bit PCI hole size is >2G. </quote> Why this suddenly isn't a problem any more? Also: how about just using the existing pci_hole64_size property? cheers, Gerd ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [Qemu-devel] [PATCH] hw/pci-host: Fix x86 Host Bridges 64bit PCI hole 2017-10-19 13:03 ` Gerd Hoffmann @ 2017-10-19 13:34 ` Marcel Apfelbaum 2017-10-20 6:55 ` Gerd Hoffmann 0 siblings, 1 reply; 19+ messages in thread From: Marcel Apfelbaum @ 2017-10-19 13:34 UTC (permalink / raw) To: Gerd Hoffmann, Laszlo Ersek, qemu-devel Cc: mst, pbonzini, ehabkost, Igor Mammedov, Dr. David Alan Gilbert On 19/10/2017 16:03, Gerd Hoffmann wrote: > Hi, > >> - commit 39848901818 pc: limit 64 bit hole to 2G by default >> shows us QEMU had the 64bit PCI hole, so it is a regression. > > commit message says: > > <quote> > It turns out that some 32 bit windows guests crash > if 64 bit PCI hole size is >2G. > </quote> > > Why this suddenly isn't a problem any more? > I suppose it is, so we need a way to turn it "off". > Also: how about just using the existing pci_hole64_size property? > This is how I started, however Eduardo and (and maybe Michael ?) where against letting the upper management software to deal with such a low low level detail. They simply can't take such a decision. This is why the property you mentioned is not ever linked in code anywhere! It is simply not implemented and not used. What about renaming the added compat property from x-pci-hole64-fix to pci-hole64? Users using 32-bit Windows guests having the mentioned issue guests will disable it. Thanks, Marcel > cheers, > Gerd > ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [Qemu-devel] [PATCH] hw/pci-host: Fix x86 Host Bridges 64bit PCI hole 2017-10-19 13:34 ` Marcel Apfelbaum @ 2017-10-20 6:55 ` Gerd Hoffmann 2017-10-20 9:32 ` Laszlo Ersek 2017-10-20 13:47 ` Marcel Apfelbaum 0 siblings, 2 replies; 19+ messages in thread From: Gerd Hoffmann @ 2017-10-20 6:55 UTC (permalink / raw) To: Marcel Apfelbaum, Laszlo Ersek, qemu-devel Cc: mst, pbonzini, ehabkost, Igor Mammedov, Dr. David Alan Gilbert Hi, > > commit message says: > > > > <quote> > > It turns out that some 32 bit windows guests crash > > if 64 bit PCI hole size is >2G. > > </quote> > > > > Why this suddenly isn't a problem any more? > > > > I suppose it is, so we need a way to turn it "off". Or have machine types behave differently, i.e. give q35 a large 64bit hole and leave pc as-is. Devices with that large bars are most likely pci express anyway ... > This is how I started, however Eduardo and (and maybe Michael ?) > where against letting the upper management software to deal with > such a low low level detail. They simply can't take such a decision. > This is why the property you mentioned is not ever linked > in code anywhere! It is simply not implemented and not used. Makes sense. BTW: Is it safe to just assume 40 bits physical is going to work? My workstation: model name : Intel(R) Core(TM) i7-7700K CPU @ 4.20GHz address sizes : 39 bits physical, 48 bits virtual Does this imply ept is limited 39 bits physical too? cheers, Gerd ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [Qemu-devel] [PATCH] hw/pci-host: Fix x86 Host Bridges 64bit PCI hole 2017-10-20 6:55 ` Gerd Hoffmann @ 2017-10-20 9:32 ` Laszlo Ersek 2017-10-20 10:59 ` Gerd Hoffmann 2017-10-20 13:47 ` Marcel Apfelbaum 1 sibling, 1 reply; 19+ messages in thread From: Laszlo Ersek @ 2017-10-20 9:32 UTC (permalink / raw) To: Gerd Hoffmann, Marcel Apfelbaum, qemu-devel Cc: mst, pbonzini, ehabkost, Igor Mammedov, Dr. David Alan Gilbert On 10/20/17 08:55, Gerd Hoffmann wrote: > Hi, > >>> commit message says: >>> >>> <quote> >>> It turns out that some 32 bit windows guests crash >>> if 64 bit PCI hole size is >2G. >>> </quote> >>> >>> Why this suddenly isn't a problem any more? >>> >> >> I suppose it is, so we need a way to turn it "off". > > Or have machine types behave differently, i.e. give q35 a large 64bit > hole and leave pc as-is. *If* we make it dependent on machine types at all, then please also make it versioned for Q35. > > Devices with that large bars are most likely pci express anyway ... > >> This is how I started, however Eduardo and (and maybe Michael ?) >> where against letting the upper management software to deal with >> such a low low level detail. They simply can't take such a decision. >> This is why the property you mentioned is not ever linked >> in code anywhere! It is simply not implemented and not used. > > Makes sense. > > BTW: Is it safe to just assume 40 bits physical is going to work? My > workstation: > > model name : Intel(R) Core(TM) i7-7700K CPU @ 4.20GHz > address sizes : 39 bits physical, 48 bits virtual > > Does this imply ept is limited 39 bits physical too? Very good point to raise; "39 bits physical" on your workstation *does* imply that EPT is limited exactly the same way. I had run into this very problem while working on 64GB+ RAM enablement in OVMF. (Back then my laptop had: "address sizes : 36 bits physical, 48 bits virtual".) Thanks, Laszlo ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [Qemu-devel] [PATCH] hw/pci-host: Fix x86 Host Bridges 64bit PCI hole 2017-10-20 9:32 ` Laszlo Ersek @ 2017-10-20 10:59 ` Gerd Hoffmann 2017-10-20 14:06 ` Marcel Apfelbaum 0 siblings, 1 reply; 19+ messages in thread From: Gerd Hoffmann @ 2017-10-20 10:59 UTC (permalink / raw) To: Laszlo Ersek, Marcel Apfelbaum, qemu-devel Cc: mst, pbonzini, ehabkost, Igor Mammedov, Dr. David Alan Gilbert On Fri, 2017-10-20 at 11:32 +0200, Laszlo Ersek wrote: > On 10/20/17 08:55, Gerd Hoffmann wrote: > > Hi, > > > > > > commit message says: > > > > > > > > <quote> > > > > It turns out that some 32 bit windows guests crash > > > > if 64 bit PCI hole size is >2G. > > > > </quote> > > > > > > > > Why this suddenly isn't a problem any more? > > > > > > > > > > I suppose it is, so we need a way to turn it "off". > > > > Or have machine types behave differently, i.e. give q35 a large > > 64bit > > hole and leave pc as-is. > > *If* we make it dependent on machine types at all, then please also > make > it versioned for Q35. We probably need that for live migration compatibility anyway. If we add a switch we can add it to both pc and q35 (I think they share most code so little extra cost), but have different defaults depending on machine type. > > BTW: Is it safe to just assume 40 bits physical is going to > > work? My > > workstation: > > > > model name : Intel(R) Core(TM) i7-7700K CPU @ 4.20GHz > > address sizes : 39 bits physical, 48 bits virtual > > > > Does this imply ept is limited 39 bits physical too? > > Very good point to raise; "39 bits physical" on your workstation > *does* > imply that EPT is limited exactly the same way. I had run into this > very > problem while working on 64GB+ RAM enablement in OVMF. (Back then my > laptop had: "address sizes : 36 bits physical, 48 bits virtual".) Ah, right, I remember. This is why we ended up with 32G window 32G aligned above highest memory (or reserved) address. No address above 64G will be used unless you have lots of memory in your virtual machine (in which case your host very likely supports more than 36 address lines). So I guess the options are to play safe and do something simliar on the qemu side, or go figure how much physical address space is available and use that (which should also solve the "what do we with >1TB guests" issue). The later could cause some interesting live migration issues though, when migrating between hosts with different physical address space sizes. So maybe the pci-hole64-size option isn't that bad after all? We could make it default to 2G on pc and 32G on q35 ... cheers, Gerd ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [Qemu-devel] [PATCH] hw/pci-host: Fix x86 Host Bridges 64bit PCI hole 2017-10-20 10:59 ` Gerd Hoffmann @ 2017-10-20 14:06 ` Marcel Apfelbaum 0 siblings, 0 replies; 19+ messages in thread From: Marcel Apfelbaum @ 2017-10-20 14:06 UTC (permalink / raw) To: Gerd Hoffmann, Laszlo Ersek, qemu-devel Cc: mst, pbonzini, ehabkost, Igor Mammedov, Dr. David Alan Gilbert, Laine Stump On 20/10/2017 13:59, Gerd Hoffmann wrote: > On Fri, 2017-10-20 at 11:32 +0200, Laszlo Ersek wrote: >> On 10/20/17 08:55, Gerd Hoffmann wrote: >>> Hi, >>> >>>>> commit message says: >>>>> >>>>> <quote> >>>>> It turns out that some 32 bit windows guests crash >>>>> if 64 bit PCI hole size is >2G. >>>>> </quote> >>>>> >>>>> Why this suddenly isn't a problem any more? >>>>> >>>> >>>> I suppose it is, so we need a way to turn it "off". >>> >>> Or have machine types behave differently, i.e. give q35 a large >>> 64bit >>> hole and leave pc as-is. >> >> *If* we make it dependent on machine types at all, then please also >> make >> it versioned for Q35. > > We probably need that for live migration compatibility anyway. If we > add a switch we can add it to both pc and q35 (I think they share most > code so little extra cost), but have different defaults depending on > machine type. > >>> BTW: Is it safe to just assume 40 bits physical is going to >>> work? My >>> workstation: >>> >>> model name : Intel(R) Core(TM) i7-7700K CPU @ 4.20GHz >>> address sizes : 39 bits physical, 48 bits virtual >>> >>> Does this imply ept is limited 39 bits physical too? >> >> Very good point to raise; "39 bits physical" on your workstation >> *does* >> imply that EPT is limited exactly the same way. I had run into this >> very >> problem while working on 64GB+ RAM enablement in OVMF. (Back then my >> laptop had: "address sizes : 36 bits physical, 48 bits virtual".) > > Ah, right, I remember. This is why we ended up with 32G window 32G > aligned above highest memory (or reserved) address. No address above > 64G will be used unless you have lots of memory in your virtual machine > (in which case your host very likely supports more than 36 address > lines). > > So I guess the options are to play safe and do something simliar on the > qemu side, or go figure how much physical address space is available > and use that (which should also solve the "what do we with >1TB guests" > issue). The later could cause some interesting live migration issues > though, when migrating between hosts with different physical address > space sizes. > > So maybe the pci-hole64-size option isn't that bad after all? We could > make it default to 2G on pc and 32G on q35 ... > I could live with that, as I previously stated I started that way. The property is already there, if anyone needs a bigger 64bit PCI hole, libvirt can set a new value. Adding Laine to give him the opportunity to object :) I'll come up with V3 that actually implements the pci-hole64-size, having *some* 64bit window for PCI hotplug is better than none. Thanks all for the help! Marcel > cheers, > Gerd > ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [Qemu-devel] [PATCH] hw/pci-host: Fix x86 Host Bridges 64bit PCI hole 2017-10-20 6:55 ` Gerd Hoffmann 2017-10-20 9:32 ` Laszlo Ersek @ 2017-10-20 13:47 ` Marcel Apfelbaum 2017-10-23 5:45 ` Gerd Hoffmann 1 sibling, 1 reply; 19+ messages in thread From: Marcel Apfelbaum @ 2017-10-20 13:47 UTC (permalink / raw) To: Gerd Hoffmann, Laszlo Ersek, qemu-devel Cc: mst, pbonzini, ehabkost, Igor Mammedov, Dr. David Alan Gilbert Hi Gerd, On 20/10/2017 9:55, Gerd Hoffmann wrote: > Hi, > >>> commit message says: >>> >>> <quote> >>> It turns out that some 32 bit windows guests crash >>> if 64 bit PCI hole size is >2G. >>> </quote> >>> >>> Why this suddenly isn't a problem any more? >>> >> >> I suppose it is, so we need a way to turn it "off". > > Or have machine types behave differently, i.e. give q35 a large 64bit > hole and leave pc as-is. > > Devices with that large bars are most likely pci express anyway ... > Agreed >> This is how I started, however Eduardo and (and maybe Michael ?) >> where against letting the upper management software to deal with >> such a low low level detail. They simply can't take such a decision. >> This is why the property you mentioned is not ever linked >> in code anywhere! It is simply not implemented and not used. > > Makes sense. > > BTW: Is it safe to just assume 40 bits physical is going to work? My > workstation: > > model name : Intel(R) Core(TM) i7-7700K CPU @ 4.20GHz > address sizes : 39 bits physical, 48 bits virtual > > Does this imply ept is limited 39 bits physical too? > The guest will still run with 40 bits physical! (judging the code anyway, I hope I am wrong) Then, the stakes are not so big, the Guest kernel will disregard the 64bit hole since is not CPU addressable and go on. At least Linux does it, but I think Win10 also can leave with that. Thanks, Marcel > cheers, > Gerd > ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [Qemu-devel] [PATCH] hw/pci-host: Fix x86 Host Bridges 64bit PCI hole 2017-10-20 13:47 ` Marcel Apfelbaum @ 2017-10-23 5:45 ` Gerd Hoffmann 2017-10-23 8:46 ` Marcel Apfelbaum 0 siblings, 1 reply; 19+ messages in thread From: Gerd Hoffmann @ 2017-10-23 5:45 UTC (permalink / raw) To: Marcel Apfelbaum, Laszlo Ersek, qemu-devel Cc: mst, pbonzini, ehabkost, Igor Mammedov, Dr. David Alan Gilbert Hi, > The guest will still run with 40 bits physical! > (judging the code anyway, I hope I am wrong) > > Then, the stakes are not so big, the Guest kernel will disregard > the 64bit hole since is not CPU addressable and go on. But then there is no working 64bit hole for hotplug ... cheers, Gerd ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [Qemu-devel] [PATCH] hw/pci-host: Fix x86 Host Bridges 64bit PCI hole 2017-10-23 5:45 ` Gerd Hoffmann @ 2017-10-23 8:46 ` Marcel Apfelbaum 2017-10-23 9:16 ` Gerd Hoffmann 0 siblings, 1 reply; 19+ messages in thread From: Marcel Apfelbaum @ 2017-10-23 8:46 UTC (permalink / raw) To: Gerd Hoffmann, Laszlo Ersek, qemu-devel Cc: mst, pbonzini, ehabkost, Igor Mammedov, Dr. David Alan Gilbert On 23/10/2017 8:45, Gerd Hoffmann wrote: > Hi, > >> The guest will still run with 40 bits physical! >> (judging the code anyway, I hope I am wrong) >> >> Then, the stakes are not so big, the Guest kernel will disregard >> the 64bit hole since is not CPU addressable and go on. > > But then there is no working 64bit hole for hotplug ... > Hosts with old CPUs are not fit for modern PCIe devices having large BARs anyway, no 64bit hole in this case would be OK, I think. Thanks, Marcel > cheers, > Gerd > ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [Qemu-devel] [PATCH] hw/pci-host: Fix x86 Host Bridges 64bit PCI hole 2017-10-23 8:46 ` Marcel Apfelbaum @ 2017-10-23 9:16 ` Gerd Hoffmann 2017-10-23 9:35 ` Marcel Apfelbaum 0 siblings, 1 reply; 19+ messages in thread From: Gerd Hoffmann @ 2017-10-23 9:16 UTC (permalink / raw) To: Marcel Apfelbaum, Laszlo Ersek, qemu-devel Cc: mst, pbonzini, ehabkost, Igor Mammedov, Dr. David Alan Gilbert On Mon, 2017-10-23 at 11:46 +0300, Marcel Apfelbaum wrote: > On 23/10/2017 8:45, Gerd Hoffmann wrote: > > Hi, > > > > > The guest will still run with 40 bits physical! > > > (judging the code anyway, I hope I am wrong) > > > > > > Then, the stakes are not so big, the Guest kernel will disregard > > > the 64bit hole since is not CPU addressable and go on. > > > > But then there is no working 64bit hole for hotplug ... > > > > Hosts with old CPUs are not fit for modern PCIe devices > having large BARs anyway, no 64bit hole in this case would > be OK, I think. Well, my workstation which has only 39 bits physical is kaby lake. If you classify this as "old cpu", what is a new cpu then? cheers, Gerd ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [Qemu-devel] [PATCH] hw/pci-host: Fix x86 Host Bridges 64bit PCI hole 2017-10-23 9:16 ` Gerd Hoffmann @ 2017-10-23 9:35 ` Marcel Apfelbaum 0 siblings, 0 replies; 19+ messages in thread From: Marcel Apfelbaum @ 2017-10-23 9:35 UTC (permalink / raw) To: Gerd Hoffmann, Laszlo Ersek, qemu-devel Cc: mst, pbonzini, ehabkost, Igor Mammedov, Dr. David Alan Gilbert On 23/10/2017 12:16, Gerd Hoffmann wrote: > On Mon, 2017-10-23 at 11:46 +0300, Marcel Apfelbaum wrote: >> On 23/10/2017 8:45, Gerd Hoffmann wrote: >>> Hi, >>> >>>> The guest will still run with 40 bits physical! >>>> (judging the code anyway, I hope I am wrong) >>>> >>>> Then, the stakes are not so big, the Guest kernel will disregard >>>> the 64bit hole since is not CPU addressable and go on. >>> >>> But then there is no working 64bit hole for hotplug ... >>> >> >> Hosts with old CPUs are not fit for modern PCIe devices >> having large BARs anyway, no 64bit hole in this case would >> be OK, I think. > > Well, my workstation which has only 39 bits physical is kaby lake. > If you classify this as "old cpu", what is a new cpu then? > Maybe the term "old" was not the best ..., server CPU intended for bigger hosts with a lot of RAM? Anyway, having a 32G PCI hotplug window for Q35 should be OK, for now. Thanks, Marcel > cheers, > Gerd > ^ permalink raw reply [flat|nested] 19+ messages in thread
end of thread, other threads:[~2017-10-23 9:35 UTC | newest] Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2017-10-18 9:58 [Qemu-devel] [PATCH] hw/pci-host: Fix x86 Host Bridges 64bit PCI hole Marcel Apfelbaum 2017-10-18 10:45 ` Igor Mammedov 2017-10-18 10:57 ` Marcel Apfelbaum 2017-10-18 11:40 ` Laszlo Ersek 2017-10-19 9:30 ` Marcel Apfelbaum 2017-10-19 10:41 ` Laszlo Ersek 2017-10-19 11:29 ` Marcel Apfelbaum 2017-10-19 11:52 ` Laszlo Ersek 2017-10-19 13:03 ` Gerd Hoffmann 2017-10-19 13:34 ` Marcel Apfelbaum 2017-10-20 6:55 ` Gerd Hoffmann 2017-10-20 9:32 ` Laszlo Ersek 2017-10-20 10:59 ` Gerd Hoffmann 2017-10-20 14:06 ` Marcel Apfelbaum 2017-10-20 13:47 ` Marcel Apfelbaum 2017-10-23 5:45 ` Gerd Hoffmann 2017-10-23 8:46 ` Marcel Apfelbaum 2017-10-23 9:16 ` Gerd Hoffmann 2017-10-23 9:35 ` Marcel Apfelbaum
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.