All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH] q35: fix mmconfig and PCI0._CRS
@ 2019-05-28 20:43 Gerd Hoffmann
  2019-05-29  4:48 ` Marcel Apfelbaum
                   ` (4 more replies)
  0 siblings, 5 replies; 13+ messages in thread
From: Gerd Hoffmann @ 2019-05-28 20:43 UTC (permalink / raw)
  To: qemu-devel
  Cc: Eduardo Habkost, Michael S. Tsirkin, Gerd Hoffmann,
	Paolo Bonzini, Igor Mammedov, László Érsek,
	Richard Henderson

This patch changes the handling of the mmconfig area.  Thanks to the
pci(e) expander devices we already have the logic to exclude address
ranges from PCI0._CRS.  We can simply add the mmconfig address range
to the list get it excluded as well.

With that in place we can go with a fixed pci hole which covers the
whole area from the end of (low) ram to the ioapic.

This will make the whole logic alot less fragile.  No matter where the
firmware places the mmconfig xbar, things should work correctly.  The
guest also gets a bit more PCI address space (seabios boot):

    # cat /proc/iomem
    [ ... ]
    7ffdd000-7fffffff : reserved
    80000000-afffffff : PCI Bus 0000:00            <<-- this is new
    b0000000-bfffffff : PCI MMCONFIG 0000 [bus 00-ff]
      b0000000-bfffffff : reserved
    c0000000-febfffff : PCI Bus 0000:00
      f8000000-fbffffff : 0000:00:01.0
    [ ... ]

So this is a guest visible change.

Cc: László Érsek <lersek@redhat.com>
Cc: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
---
 hw/i386/acpi-build.c | 14 ++++++++++++++
 hw/pci-host/q35.c    | 31 ++++++++-----------------------
 2 files changed, 22 insertions(+), 23 deletions(-)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 0d78d738948c..abb0e0ce9f27 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -122,6 +122,8 @@ typedef struct FwCfgTPMConfig {
     uint8_t tpmppi_version;
 } QEMU_PACKED FwCfgTPMConfig;
 
+static bool acpi_get_mcfg(AcpiMcfgInfo *mcfg);
+
 static void init_common_fadt_data(Object *o, AcpiFadtData *data)
 {
     uint32_t io = object_property_get_uint(o, ACPI_PM_PROP_PM_IO_BASE, NULL);
@@ -1807,6 +1809,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
     CrsRangeSet crs_range_set;
     PCMachineState *pcms = PC_MACHINE(machine);
     PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(machine);
+    AcpiMcfgInfo mcfg;
     uint32_t nr_mem = machine->ram_slots;
     int root_bus_limit = 0xFF;
     PCIBus *bus = NULL;
@@ -1921,6 +1924,17 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
         }
     }
 
+    /*
+     * At this point crs_range_set has all the ranges used by pci
+     * busses *other* than PCI0.  These ranges will be excluded from
+     * the PCI0._CRS.  Add mmconfig to the set so it will be excluded
+     * too.
+     */
+    if (acpi_get_mcfg(&mcfg)) {
+        crs_range_insert(crs_range_set.mem_ranges,
+                         mcfg.base, mcfg.base + mcfg.size - 1);
+    }
+
     scope = aml_scope("\\_SB.PCI0");
     /* build PCI0._CRS */
     crs = aml_resource_template();
diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
index 960939f5ed3e..72093320befe 100644
--- a/hw/pci-host/q35.c
+++ b/hw/pci-host/q35.c
@@ -258,15 +258,6 @@ static void q35_host_initfn(Object *obj)
     object_property_add_link(obj, MCH_HOST_PROP_IO_MEM, TYPE_MEMORY_REGION,
                              (Object **) &s->mch.address_space_io,
                              qdev_prop_allow_set_link_before_realize, 0, NULL);
-
-    /* Leave enough space for the biggest MCFG BAR */
-    /* TODO: this matches current bios behaviour, but
-     * it's not a power of two, which means an MTRR
-     * can't cover it exactly.
-     */
-    range_set_bounds(&s->mch.pci_hole,
-            MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT + MCH_HOST_BRIDGE_PCIEXBAR_MAX,
-            IO_APIC_DEFAULT_ADDRESS - 1);
 }
 
 static const TypeInfo q35_host_info = {
@@ -338,20 +329,6 @@ static void mch_update_pciexbar(MCHPCIState *mch)
     }
     addr = pciexbar & addr_mask;
     pcie_host_mmcfg_update(pehb, enable, addr, length);
-    /* Leave enough space for the MCFG BAR */
-    /*
-     * TODO: this matches current bios behaviour, but it's not a power of two,
-     * which means an MTRR can't cover it exactly.
-     */
-    if (enable) {
-        range_set_bounds(&mch->pci_hole,
-                         addr + length,
-                         IO_APIC_DEFAULT_ADDRESS - 1);
-    } else {
-        range_set_bounds(&mch->pci_hole,
-                         MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT,
-                         IO_APIC_DEFAULT_ADDRESS - 1);
-    }
 }
 
 /* PAM */
@@ -484,6 +461,14 @@ static void mch_update(MCHPCIState *mch)
     mch_update_pam(mch);
     mch_update_smram(mch);
     mch_update_ext_tseg_mbytes(mch);
+
+    /*
+     * pci hole goes from end-of-low-ram to io-apic.
+     * mmconfig will be excluded by the dsdt builder.
+     */
+    range_set_bounds(&mch->pci_hole,
+                     mch->below_4g_mem_size,
+                     IO_APIC_DEFAULT_ADDRESS - 1);
 }
 
 static int mch_post_load(void *opaque, int version_id)
-- 
2.18.1



^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [Qemu-devel] [PATCH] q35: fix mmconfig and PCI0._CRS
  2019-05-28 20:43 [Qemu-devel] [PATCH] q35: fix mmconfig and PCI0._CRS Gerd Hoffmann
@ 2019-05-29  4:48 ` Marcel Apfelbaum
  2019-05-29  5:01   ` Gerd Hoffmann
  2019-05-29 11:16 ` Igor Mammedov
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 13+ messages in thread
From: Marcel Apfelbaum @ 2019-05-29  4:48 UTC (permalink / raw)
  To: Gerd Hoffmann, qemu-devel
  Cc: Eduardo Habkost, Michael S. Tsirkin, Paolo Bonzini,
	Igor Mammedov, László Érsek, Richard Henderson

Hi Gerd,

On 5/28/19 11:43 PM, Gerd Hoffmann wrote:
> This patch changes the handling of the mmconfig area.  Thanks to the
> pci(e) expander devices we already have the logic to exclude address
> ranges from PCI0._CRS.  We can simply add the mmconfig address range
> to the list get it excluded as well.
>
> With that in place we can go with a fixed pci hole which covers the
> whole area from the end of (low) ram to the ioapic.
>
> This will make the whole logic alot less fragile.  No matter where the
> firmware places the mmconfig xbar, things should work correctly.  The
> guest also gets a bit more PCI address space (seabios boot):
>
>      # cat /proc/iomem
>      [ ... ]
>      7ffdd000-7fffffff : reserved
>      80000000-afffffff : PCI Bus 0000:00            <<-- this is new
>      b0000000-bfffffff : PCI MMCONFIG 0000 [bus 00-ff]
>        b0000000-bfffffff : reserved
>      c0000000-febfffff : PCI Bus 0000:00
>        f8000000-fbffffff : 0000:00:01.0
>      [ ... ]
>
> So this is a guest visible change.

Looks good to me, but shouldn't we use some compat
property to not affect previous machine versions?


Thanks,
Marcel

> Cc: László Érsek <lersek@redhat.com>
> Cc: Igor Mammedov <imammedo@redhat.com>
> Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
> ---
>   hw/i386/acpi-build.c | 14 ++++++++++++++
>   hw/pci-host/q35.c    | 31 ++++++++-----------------------
>   2 files changed, 22 insertions(+), 23 deletions(-)
>
> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> index 0d78d738948c..abb0e0ce9f27 100644
> --- a/hw/i386/acpi-build.c
> +++ b/hw/i386/acpi-build.c
> @@ -122,6 +122,8 @@ typedef struct FwCfgTPMConfig {
>       uint8_t tpmppi_version;
>   } QEMU_PACKED FwCfgTPMConfig;
>   
> +static bool acpi_get_mcfg(AcpiMcfgInfo *mcfg);
> +
>   static void init_common_fadt_data(Object *o, AcpiFadtData *data)
>   {
>       uint32_t io = object_property_get_uint(o, ACPI_PM_PROP_PM_IO_BASE, NULL);
> @@ -1807,6 +1809,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
>       CrsRangeSet crs_range_set;
>       PCMachineState *pcms = PC_MACHINE(machine);
>       PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(machine);
> +    AcpiMcfgInfo mcfg;
>       uint32_t nr_mem = machine->ram_slots;
>       int root_bus_limit = 0xFF;
>       PCIBus *bus = NULL;
> @@ -1921,6 +1924,17 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
>           }
>       }
>   
> +    /*
> +     * At this point crs_range_set has all the ranges used by pci
> +     * busses *other* than PCI0.  These ranges will be excluded from
> +     * the PCI0._CRS.  Add mmconfig to the set so it will be excluded
> +     * too.
> +     */
> +    if (acpi_get_mcfg(&mcfg)) {
> +        crs_range_insert(crs_range_set.mem_ranges,
> +                         mcfg.base, mcfg.base + mcfg.size - 1);
> +    }
> +
>       scope = aml_scope("\\_SB.PCI0");
>       /* build PCI0._CRS */
>       crs = aml_resource_template();
> diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
> index 960939f5ed3e..72093320befe 100644
> --- a/hw/pci-host/q35.c
> +++ b/hw/pci-host/q35.c
> @@ -258,15 +258,6 @@ static void q35_host_initfn(Object *obj)
>       object_property_add_link(obj, MCH_HOST_PROP_IO_MEM, TYPE_MEMORY_REGION,
>                                (Object **) &s->mch.address_space_io,
>                                qdev_prop_allow_set_link_before_realize, 0, NULL);
> -
> -    /* Leave enough space for the biggest MCFG BAR */
> -    /* TODO: this matches current bios behaviour, but
> -     * it's not a power of two, which means an MTRR
> -     * can't cover it exactly.
> -     */
> -    range_set_bounds(&s->mch.pci_hole,
> -            MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT + MCH_HOST_BRIDGE_PCIEXBAR_MAX,
> -            IO_APIC_DEFAULT_ADDRESS - 1);
>   }
>   
>   static const TypeInfo q35_host_info = {
> @@ -338,20 +329,6 @@ static void mch_update_pciexbar(MCHPCIState *mch)
>       }
>       addr = pciexbar & addr_mask;
>       pcie_host_mmcfg_update(pehb, enable, addr, length);
> -    /* Leave enough space for the MCFG BAR */
> -    /*
> -     * TODO: this matches current bios behaviour, but it's not a power of two,
> -     * which means an MTRR can't cover it exactly.
> -     */
> -    if (enable) {
> -        range_set_bounds(&mch->pci_hole,
> -                         addr + length,
> -                         IO_APIC_DEFAULT_ADDRESS - 1);
> -    } else {
> -        range_set_bounds(&mch->pci_hole,
> -                         MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT,
> -                         IO_APIC_DEFAULT_ADDRESS - 1);
> -    }
>   }
>   
>   /* PAM */
> @@ -484,6 +461,14 @@ static void mch_update(MCHPCIState *mch)
>       mch_update_pam(mch);
>       mch_update_smram(mch);
>       mch_update_ext_tseg_mbytes(mch);
> +
> +    /*
> +     * pci hole goes from end-of-low-ram to io-apic.
> +     * mmconfig will be excluded by the dsdt builder.
> +     */
> +    range_set_bounds(&mch->pci_hole,
> +                     mch->below_4g_mem_size,
> +                     IO_APIC_DEFAULT_ADDRESS - 1);
>   }
>   
>   static int mch_post_load(void *opaque, int version_id)



^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Qemu-devel] [PATCH] q35: fix mmconfig and PCI0._CRS
  2019-05-29  4:48 ` Marcel Apfelbaum
@ 2019-05-29  5:01   ` Gerd Hoffmann
  2019-05-29  5:11     ` Marcel Apfelbaum
  0 siblings, 1 reply; 13+ messages in thread
From: Gerd Hoffmann @ 2019-05-29  5:01 UTC (permalink / raw)
  To: Marcel Apfelbaum
  Cc: Eduardo Habkost, Michael S. Tsirkin, qemu-devel, Paolo Bonzini,
	Igor Mammedov, László Érsek, Richard Henderson

On Wed, May 29, 2019 at 07:48:03AM +0300, Marcel Apfelbaum wrote:
> Hi Gerd,
> 
> On 5/28/19 11:43 PM, Gerd Hoffmann wrote:
> > This patch changes the handling of the mmconfig area.  Thanks to the
> > pci(e) expander devices we already have the logic to exclude address
> > ranges from PCI0._CRS.  We can simply add the mmconfig address range
> > to the list get it excluded as well.
> > 
> > With that in place we can go with a fixed pci hole which covers the
> > whole area from the end of (low) ram to the ioapic.
> > 
> > This will make the whole logic alot less fragile.  No matter where the
> > firmware places the mmconfig xbar, things should work correctly.  The
> > guest also gets a bit more PCI address space (seabios boot):
> > 
> >      # cat /proc/iomem
> >      [ ... ]
> >      7ffdd000-7fffffff : reserved
> >      80000000-afffffff : PCI Bus 0000:00            <<-- this is new
> >      b0000000-bfffffff : PCI MMCONFIG 0000 [bus 00-ff]
> >        b0000000-bfffffff : reserved
> >      c0000000-febfffff : PCI Bus 0000:00
> >        f8000000-fbffffff : 0000:00:01.0
> >      [ ... ]
> > 
> > So this is a guest visible change.
> 
> Looks good to me, but shouldn't we use some compat
> property to not affect previous machine versions?

acpi table changes are typically not versioned, and IIRC the acpi tables
are part of the live migration data stream so the tables will not change
under the feet of the running guest even when it is migrated to another
qemu version.

cheers,
  Gerd



^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Qemu-devel] [PATCH] q35: fix mmconfig and PCI0._CRS
  2019-05-29  5:01   ` Gerd Hoffmann
@ 2019-05-29  5:11     ` Marcel Apfelbaum
  2019-05-29  9:38       ` Paolo Bonzini
  0 siblings, 1 reply; 13+ messages in thread
From: Marcel Apfelbaum @ 2019-05-29  5:11 UTC (permalink / raw)
  To: Gerd Hoffmann
  Cc: Eduardo Habkost, Michael S. Tsirkin, qemu-devel, Paolo Bonzini,
	Igor Mammedov, László Érsek, Richard Henderson

Hi Gerd,

On 5/29/19 8:01 AM, Gerd Hoffmann wrote:
> On Wed, May 29, 2019 at 07:48:03AM +0300, Marcel Apfelbaum wrote:
>> Hi Gerd,
>>
>> On 5/28/19 11:43 PM, Gerd Hoffmann wrote:
>>> This patch changes the handling of the mmconfig area.  Thanks to the
>>> pci(e) expander devices we already have the logic to exclude address
>>> ranges from PCI0._CRS.  We can simply add the mmconfig address range
>>> to the list get it excluded as well.
>>>
>>> With that in place we can go with a fixed pci hole which covers the
>>> whole area from the end of (low) ram to the ioapic.
>>>
>>> This will make the whole logic alot less fragile.  No matter where the
>>> firmware places the mmconfig xbar, things should work correctly.  The
>>> guest also gets a bit more PCI address space (seabios boot):
>>>
>>>       # cat /proc/iomem
>>>       [ ... ]
>>>       7ffdd000-7fffffff : reserved
>>>       80000000-afffffff : PCI Bus 0000:00            <<-- this is new
>>>       b0000000-bfffffff : PCI MMCONFIG 0000 [bus 00-ff]
>>>         b0000000-bfffffff : reserved
>>>       c0000000-febfffff : PCI Bus 0000:00
>>>         f8000000-fbffffff : 0000:00:01.0
>>>       [ ... ]
>>>
>>> So this is a guest visible change.
>> Looks good to me, but shouldn't we use some compat
>> property to not affect previous machine versions?
> acpi table changes are typically not versioned, and IIRC the acpi tables
> are part of the live migration data stream so the tables will not change
> under the feet of the running guest even when it is migrated to another
> qemu version.

I wasn't "worried" only about migration, but also about the visible 
change in
the guests keeping the same machine type and upgrading QEMU.

I am not saying is a "big" issue since it will probably not affect the 
guests at all.
Upgrading QEMU will look like a firmware update or something.

Thanks,
Marcel

> cheers,
>    Gerd
>



^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Qemu-devel] [PATCH] q35: fix mmconfig and PCI0._CRS
  2019-05-29  5:11     ` Marcel Apfelbaum
@ 2019-05-29  9:38       ` Paolo Bonzini
  0 siblings, 0 replies; 13+ messages in thread
From: Paolo Bonzini @ 2019-05-29  9:38 UTC (permalink / raw)
  To: Marcel Apfelbaum, Gerd Hoffmann
  Cc: Eduardo Habkost, Michael S. Tsirkin, qemu-devel, Igor Mammedov,
	László Érsek, Richard Henderson

On 29/05/19 07:11, Marcel Apfelbaum wrote:
> I am not saying is a "big" issue since it will probably not affect the
> guests at all.
> Upgrading QEMU will look like a firmware update or something.

Yeah, this happens all the time.  This kind of bugfix is generally not
versioned.

Paolo


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Qemu-devel] [PATCH] q35: fix mmconfig and PCI0._CRS
  2019-05-28 20:43 [Qemu-devel] [PATCH] q35: fix mmconfig and PCI0._CRS Gerd Hoffmann
  2019-05-29  4:48 ` Marcel Apfelbaum
@ 2019-05-29 11:16 ` Igor Mammedov
  2019-05-29 13:20   ` Gerd Hoffmann
  2019-05-29 22:05 ` Michael S. Tsirkin
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 13+ messages in thread
From: Igor Mammedov @ 2019-05-29 11:16 UTC (permalink / raw)
  To: Gerd Hoffmann
  Cc: Eduardo Habkost, Michael S. Tsirkin, qemu-devel, Paolo Bonzini,
	László Érsek, Richard Henderson

On Tue, 28 May 2019 22:43:31 +0200
Gerd Hoffmann <kraxel@redhat.com> wrote:

> This patch changes the handling of the mmconfig area.  Thanks to the
> pci(e) expander devices we already have the logic to exclude address
> ranges from PCI0._CRS.  We can simply add the mmconfig address range
> to the list get it excluded as well.
> 
> With that in place we can go with a fixed pci hole which covers the
> whole area from the end of (low) ram to the ioapic.
> 
> This will make the whole logic alot less fragile.  No matter where the
> firmware places the mmconfig xbar, things should work correctly.  The
> guest also gets a bit more PCI address space (seabios boot):
> 
>     # cat /proc/iomem
>     [ ... ]
>     7ffdd000-7fffffff : reserved
>     80000000-afffffff : PCI Bus 0000:00            <<-- this is new
>     b0000000-bfffffff : PCI MMCONFIG 0000 [bus 00-ff]
>       b0000000-bfffffff : reserved
>     c0000000-febfffff : PCI Bus 0000:00
>       f8000000-fbffffff : 0000:00:01.0
>     [ ... ]
> 
> So this is a guest visible change.

My impression was that QEMU would/should add into CRS whatever bars
firmware programmed (and it looks like QEMU doesn't do it right).
So I'm not really fond of adding bigger hole just to paper over
existing bug (still might be the way to go). Let me ponder a bit
on it and look into what's isn't working on QEMU side properly.

> 
> Cc: László Érsek <lersek@redhat.com>
> Cc: Igor Mammedov <imammedo@redhat.com>
> Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
> ---
>  hw/i386/acpi-build.c | 14 ++++++++++++++
>  hw/pci-host/q35.c    | 31 ++++++++-----------------------
>  2 files changed, 22 insertions(+), 23 deletions(-)
> 
> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> index 0d78d738948c..abb0e0ce9f27 100644
> --- a/hw/i386/acpi-build.c
> +++ b/hw/i386/acpi-build.c
> @@ -122,6 +122,8 @@ typedef struct FwCfgTPMConfig {
>      uint8_t tpmppi_version;
>  } QEMU_PACKED FwCfgTPMConfig;
>  
> +static bool acpi_get_mcfg(AcpiMcfgInfo *mcfg);
> +
>  static void init_common_fadt_data(Object *o, AcpiFadtData *data)
>  {
>      uint32_t io = object_property_get_uint(o, ACPI_PM_PROP_PM_IO_BASE, NULL);
> @@ -1807,6 +1809,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
>      CrsRangeSet crs_range_set;
>      PCMachineState *pcms = PC_MACHINE(machine);
>      PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(machine);
> +    AcpiMcfgInfo mcfg;
>      uint32_t nr_mem = machine->ram_slots;
>      int root_bus_limit = 0xFF;
>      PCIBus *bus = NULL;
> @@ -1921,6 +1924,17 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
>          }
>      }
>  
> +    /*
> +     * At this point crs_range_set has all the ranges used by pci
> +     * busses *other* than PCI0.  These ranges will be excluded from
> +     * the PCI0._CRS.  Add mmconfig to the set so it will be excluded
> +     * too.
> +     */
> +    if (acpi_get_mcfg(&mcfg)) {
> +        crs_range_insert(crs_range_set.mem_ranges,
> +                         mcfg.base, mcfg.base + mcfg.size - 1);
> +    }
> +
>      scope = aml_scope("\\_SB.PCI0");
>      /* build PCI0._CRS */
>      crs = aml_resource_template();
> diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
> index 960939f5ed3e..72093320befe 100644
> --- a/hw/pci-host/q35.c
> +++ b/hw/pci-host/q35.c
> @@ -258,15 +258,6 @@ static void q35_host_initfn(Object *obj)
>      object_property_add_link(obj, MCH_HOST_PROP_IO_MEM, TYPE_MEMORY_REGION,
>                               (Object **) &s->mch.address_space_io,
>                               qdev_prop_allow_set_link_before_realize, 0, NULL);
> -
> -    /* Leave enough space for the biggest MCFG BAR */
> -    /* TODO: this matches current bios behaviour, but
> -     * it's not a power of two, which means an MTRR
> -     * can't cover it exactly.
> -     */
> -    range_set_bounds(&s->mch.pci_hole,
> -            MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT + MCH_HOST_BRIDGE_PCIEXBAR_MAX,
> -            IO_APIC_DEFAULT_ADDRESS - 1);
>  }
>  
>  static const TypeInfo q35_host_info = {
> @@ -338,20 +329,6 @@ static void mch_update_pciexbar(MCHPCIState *mch)
>      }
>      addr = pciexbar & addr_mask;
>      pcie_host_mmcfg_update(pehb, enable, addr, length);
> -    /* Leave enough space for the MCFG BAR */
> -    /*
> -     * TODO: this matches current bios behaviour, but it's not a power of two,
> -     * which means an MTRR can't cover it exactly.
> -     */
> -    if (enable) {
> -        range_set_bounds(&mch->pci_hole,
> -                         addr + length,
> -                         IO_APIC_DEFAULT_ADDRESS - 1);
> -    } else {
> -        range_set_bounds(&mch->pci_hole,
> -                         MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT,
> -                         IO_APIC_DEFAULT_ADDRESS - 1);
> -    }
>  }
>  
>  /* PAM */
> @@ -484,6 +461,14 @@ static void mch_update(MCHPCIState *mch)
>      mch_update_pam(mch);
>      mch_update_smram(mch);
>      mch_update_ext_tseg_mbytes(mch);
> +
> +    /*
> +     * pci hole goes from end-of-low-ram to io-apic.
> +     * mmconfig will be excluded by the dsdt builder.
> +     */
> +    range_set_bounds(&mch->pci_hole,
> +                     mch->below_4g_mem_size,
> +                     IO_APIC_DEFAULT_ADDRESS - 1);
>  }
>  
>  static int mch_post_load(void *opaque, int version_id)



^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Qemu-devel] [PATCH] q35: fix mmconfig and PCI0._CRS
  2019-05-29 11:16 ` Igor Mammedov
@ 2019-05-29 13:20   ` Gerd Hoffmann
  0 siblings, 0 replies; 13+ messages in thread
From: Gerd Hoffmann @ 2019-05-29 13:20 UTC (permalink / raw)
  To: Igor Mammedov
  Cc: Eduardo Habkost, Michael S. Tsirkin, qemu-devel, Paolo Bonzini,
	László Érsek, Richard Henderson

On Wed, May 29, 2019 at 01:16:52PM +0200, Igor Mammedov wrote:
> On Tue, 28 May 2019 22:43:31 +0200
> Gerd Hoffmann <kraxel@redhat.com> wrote:
> 
> > This patch changes the handling of the mmconfig area.  Thanks to the
> > pci(e) expander devices we already have the logic to exclude address
> > ranges from PCI0._CRS.  We can simply add the mmconfig address range
> > to the list get it excluded as well.
> > 
> > With that in place we can go with a fixed pci hole which covers the
> > whole area from the end of (low) ram to the ioapic.
> > 
> > This will make the whole logic alot less fragile.  No matter where the
> > firmware places the mmconfig xbar, things should work correctly.  The
> > guest also gets a bit more PCI address space (seabios boot):
> > 
> >     # cat /proc/iomem
> >     [ ... ]
> >     7ffdd000-7fffffff : reserved
> >     80000000-afffffff : PCI Bus 0000:00            <<-- this is new
> >     b0000000-bfffffff : PCI MMCONFIG 0000 [bus 00-ff]
> >       b0000000-bfffffff : reserved
> >     c0000000-febfffff : PCI Bus 0000:00
> >       f8000000-fbffffff : 0000:00:01.0
> >     [ ... ]
> > 
> > So this is a guest visible change.
> 
> My impression was that QEMU would/should add into CRS whatever bars
> firmware programmed (and it looks like QEMU doesn't do it right).

Well, that works reasonable well.  It looks at all pci bars.  The ones
which do not belong to PCI0 are added to their pci(e) expander.  All
remaining address space of the pci hole is added to PCI0.  At times
things look a bit odd as all unused ranges go to PCI0 even in cases like
this one:

[ ... ]
84a00000-85203fff : PCI Bus 0000:40
  84a00000-84bfffff : PCI Bus 0000:44
  84c00000-84dfffff : PCI Bus 0000:43
  84e00000-84ffffff : PCI Bus 0000:42
  85000000-851fffff : PCI Bus 0000:41
  85200000-85200fff : 0000:40:02.3
  85201000-85201fff : 0000:40:02.2
  85202000-85202fff : 0000:40:02.1
  85203000-85203fff : 0000:40:02.0
85204000-853fffff : PCI Bus 0000:00   <<-- this could be given to PCI Bus 0000:40
85400000-85c03fff : PCI Bus 0000:80
  85400000-855fffff : PCI Bus 0000:84
[ ... ]

but that is more or less cosmetical.

> So I'm not really fond of adding bigger hole just to paper over
> existing bug (still might be the way to go). Let me ponder a bit
> on it and look into what's isn't working on QEMU side properly.

Basically qemu assumes the (32bit) pci hole starts above the mmconfig
bar.  The pci hole should start above low memory though, like it does
on 'pc'.  And the mmconfig bar should be excluded.

cheers,
  Gerd



^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Qemu-devel] [PATCH] q35: fix mmconfig and PCI0._CRS
  2019-05-28 20:43 [Qemu-devel] [PATCH] q35: fix mmconfig and PCI0._CRS Gerd Hoffmann
  2019-05-29  4:48 ` Marcel Apfelbaum
  2019-05-29 11:16 ` Igor Mammedov
@ 2019-05-29 22:05 ` Michael S. Tsirkin
  2019-06-06 13:59   ` Michael S. Tsirkin
  2019-05-30  8:58 ` Igor Mammedov
  2019-06-03 10:43 ` Laszlo Ersek
  4 siblings, 1 reply; 13+ messages in thread
From: Michael S. Tsirkin @ 2019-05-29 22:05 UTC (permalink / raw)
  To: Gerd Hoffmann
  Cc: Eduardo Habkost, qemu-devel, Paolo Bonzini, Igor Mammedov,
	László Érsek, Richard Henderson

On Tue, May 28, 2019 at 10:43:31PM +0200, Gerd Hoffmann wrote:
> This patch changes the handling of the mmconfig area.  Thanks to the
> pci(e) expander devices we already have the logic to exclude address
> ranges from PCI0._CRS.  We can simply add the mmconfig address range
> to the list get it excluded as well.
> 
> With that in place we can go with a fixed pci hole which covers the
> whole area from the end of (low) ram to the ioapic.
> 
> This will make the whole logic alot less fragile.  No matter where the
> firmware places the mmconfig xbar, things should work correctly.  The
> guest also gets a bit more PCI address space (seabios boot):
> 
>     # cat /proc/iomem
>     [ ... ]
>     7ffdd000-7fffffff : reserved
>     80000000-afffffff : PCI Bus 0000:00            <<-- this is new
>     b0000000-bfffffff : PCI MMCONFIG 0000 [bus 00-ff]
>       b0000000-bfffffff : reserved
>     c0000000-febfffff : PCI Bus 0000:00
>       f8000000-fbffffff : 0000:00:01.0
>     [ ... ]
> 
> So this is a guest visible change.
> 
> Cc: László Érsek <lersek@redhat.com>
> Cc: Igor Mammedov <imammedo@redhat.com>
> Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>

Hi Gerd!
Please rebase on top of latest pci tree.

After the rebase this will start failing since we
are now asserting on any changes to ACPI tables - and the way to
fix it is to add a comma-separated list of
changed ACPI tables to tests/bios-tables-test-allowed-diff.h

As a maintainer I will notice this and update the expected
files before pushing.

> ---
>  hw/i386/acpi-build.c | 14 ++++++++++++++
>  hw/pci-host/q35.c    | 31 ++++++++-----------------------
>  2 files changed, 22 insertions(+), 23 deletions(-)
> 
> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> index 0d78d738948c..abb0e0ce9f27 100644
> --- a/hw/i386/acpi-build.c
> +++ b/hw/i386/acpi-build.c
> @@ -122,6 +122,8 @@ typedef struct FwCfgTPMConfig {
>      uint8_t tpmppi_version;
>  } QEMU_PACKED FwCfgTPMConfig;
>  
> +static bool acpi_get_mcfg(AcpiMcfgInfo *mcfg);
> +
>  static void init_common_fadt_data(Object *o, AcpiFadtData *data)
>  {
>      uint32_t io = object_property_get_uint(o, ACPI_PM_PROP_PM_IO_BASE, NULL);
> @@ -1807,6 +1809,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
>      CrsRangeSet crs_range_set;
>      PCMachineState *pcms = PC_MACHINE(machine);
>      PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(machine);
> +    AcpiMcfgInfo mcfg;
>      uint32_t nr_mem = machine->ram_slots;
>      int root_bus_limit = 0xFF;
>      PCIBus *bus = NULL;
> @@ -1921,6 +1924,17 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
>          }
>      }
>  
> +    /*
> +     * At this point crs_range_set has all the ranges used by pci
> +     * busses *other* than PCI0.  These ranges will be excluded from
> +     * the PCI0._CRS.  Add mmconfig to the set so it will be excluded
> +     * too.
> +     */
> +    if (acpi_get_mcfg(&mcfg)) {
> +        crs_range_insert(crs_range_set.mem_ranges,
> +                         mcfg.base, mcfg.base + mcfg.size - 1);
> +    }
> +
>      scope = aml_scope("\\_SB.PCI0");
>      /* build PCI0._CRS */
>      crs = aml_resource_template();
> diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
> index 960939f5ed3e..72093320befe 100644
> --- a/hw/pci-host/q35.c
> +++ b/hw/pci-host/q35.c
> @@ -258,15 +258,6 @@ static void q35_host_initfn(Object *obj)
>      object_property_add_link(obj, MCH_HOST_PROP_IO_MEM, TYPE_MEMORY_REGION,
>                               (Object **) &s->mch.address_space_io,
>                               qdev_prop_allow_set_link_before_realize, 0, NULL);
> -
> -    /* Leave enough space for the biggest MCFG BAR */
> -    /* TODO: this matches current bios behaviour, but
> -     * it's not a power of two, which means an MTRR
> -     * can't cover it exactly.
> -     */
> -    range_set_bounds(&s->mch.pci_hole,
> -            MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT + MCH_HOST_BRIDGE_PCIEXBAR_MAX,
> -            IO_APIC_DEFAULT_ADDRESS - 1);
>  }
>  
>  static const TypeInfo q35_host_info = {
> @@ -338,20 +329,6 @@ static void mch_update_pciexbar(MCHPCIState *mch)
>      }
>      addr = pciexbar & addr_mask;
>      pcie_host_mmcfg_update(pehb, enable, addr, length);
> -    /* Leave enough space for the MCFG BAR */
> -    /*
> -     * TODO: this matches current bios behaviour, but it's not a power of two,
> -     * which means an MTRR can't cover it exactly.
> -     */
> -    if (enable) {
> -        range_set_bounds(&mch->pci_hole,
> -                         addr + length,
> -                         IO_APIC_DEFAULT_ADDRESS - 1);
> -    } else {
> -        range_set_bounds(&mch->pci_hole,
> -                         MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT,
> -                         IO_APIC_DEFAULT_ADDRESS - 1);
> -    }
>  }
>  
>  /* PAM */
> @@ -484,6 +461,14 @@ static void mch_update(MCHPCIState *mch)
>      mch_update_pam(mch);
>      mch_update_smram(mch);
>      mch_update_ext_tseg_mbytes(mch);
> +
> +    /*
> +     * pci hole goes from end-of-low-ram to io-apic.
> +     * mmconfig will be excluded by the dsdt builder.
> +     */
> +    range_set_bounds(&mch->pci_hole,
> +                     mch->below_4g_mem_size,
> +                     IO_APIC_DEFAULT_ADDRESS - 1);
>  }
>  
>  static int mch_post_load(void *opaque, int version_id)
> -- 
> 2.18.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Qemu-devel] [PATCH] q35: fix mmconfig and PCI0._CRS
  2019-05-28 20:43 [Qemu-devel] [PATCH] q35: fix mmconfig and PCI0._CRS Gerd Hoffmann
                   ` (2 preceding siblings ...)
  2019-05-29 22:05 ` Michael S. Tsirkin
@ 2019-05-30  8:58 ` Igor Mammedov
  2019-06-03 10:43 ` Laszlo Ersek
  4 siblings, 0 replies; 13+ messages in thread
From: Igor Mammedov @ 2019-05-30  8:58 UTC (permalink / raw)
  To: Gerd Hoffmann
  Cc: Eduardo Habkost, Michael S. Tsirkin, qemu-devel, Paolo Bonzini,
	László Érsek, Richard Henderson

On Tue, 28 May 2019 22:43:31 +0200
Gerd Hoffmann <kraxel@redhat.com> wrote:

> This patch changes the handling of the mmconfig area.  Thanks to the
> pci(e) expander devices we already have the logic to exclude address
> ranges from PCI0._CRS.  We can simply add the mmconfig address range
> to the list get it excluded as well.
> 
> With that in place we can go with a fixed pci hole which covers the
> whole area from the end of (low) ram to the ioapic.
> 
> This will make the whole logic alot less fragile.  No matter where the
> firmware places the mmconfig xbar, things should work correctly.  The
> guest also gets a bit more PCI address space (seabios boot):
> 
>     # cat /proc/iomem
>     [ ... ]
>     7ffdd000-7fffffff : reserved
>     80000000-afffffff : PCI Bus 0000:00            <<-- this is new
>     b0000000-bfffffff : PCI MMCONFIG 0000 [bus 00-ff]
>       b0000000-bfffffff : reserved
>     c0000000-febfffff : PCI Bus 0000:00
>       f8000000-fbffffff : 0000:00:01.0
>     [ ... ]
> 
> So this is a guest visible change.
> 
> Cc: László Érsek <lersek@redhat.com>
> Cc: Igor Mammedov <imammedo@redhat.com>
> Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>

Reviewed-by: Igor Mammedov <imammedo@redhat.com>

> ---
>  hw/i386/acpi-build.c | 14 ++++++++++++++
>  hw/pci-host/q35.c    | 31 ++++++++-----------------------
>  2 files changed, 22 insertions(+), 23 deletions(-)
> 
> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> index 0d78d738948c..abb0e0ce9f27 100644
> --- a/hw/i386/acpi-build.c
> +++ b/hw/i386/acpi-build.c
> @@ -122,6 +122,8 @@ typedef struct FwCfgTPMConfig {
>      uint8_t tpmppi_version;
>  } QEMU_PACKED FwCfgTPMConfig;
>  
> +static bool acpi_get_mcfg(AcpiMcfgInfo *mcfg);
> +
>  static void init_common_fadt_data(Object *o, AcpiFadtData *data)
>  {
>      uint32_t io = object_property_get_uint(o, ACPI_PM_PROP_PM_IO_BASE, NULL);
> @@ -1807,6 +1809,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
>      CrsRangeSet crs_range_set;
>      PCMachineState *pcms = PC_MACHINE(machine);
>      PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(machine);
> +    AcpiMcfgInfo mcfg;
>      uint32_t nr_mem = machine->ram_slots;
>      int root_bus_limit = 0xFF;
>      PCIBus *bus = NULL;
> @@ -1921,6 +1924,17 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
>          }
>      }
>  
> +    /*
> +     * At this point crs_range_set has all the ranges used by pci
> +     * busses *other* than PCI0.  These ranges will be excluded from
> +     * the PCI0._CRS.  Add mmconfig to the set so it will be excluded
> +     * too.
> +     */
> +    if (acpi_get_mcfg(&mcfg)) {
> +        crs_range_insert(crs_range_set.mem_ranges,
> +                         mcfg.base, mcfg.base + mcfg.size - 1);
> +    }
> +
>      scope = aml_scope("\\_SB.PCI0");
>      /* build PCI0._CRS */
>      crs = aml_resource_template();
> diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
> index 960939f5ed3e..72093320befe 100644
> --- a/hw/pci-host/q35.c
> +++ b/hw/pci-host/q35.c
> @@ -258,15 +258,6 @@ static void q35_host_initfn(Object *obj)
>      object_property_add_link(obj, MCH_HOST_PROP_IO_MEM, TYPE_MEMORY_REGION,
>                               (Object **) &s->mch.address_space_io,
>                               qdev_prop_allow_set_link_before_realize, 0, NULL);
> -
> -    /* Leave enough space for the biggest MCFG BAR */
> -    /* TODO: this matches current bios behaviour, but
> -     * it's not a power of two, which means an MTRR
> -     * can't cover it exactly.
> -     */
> -    range_set_bounds(&s->mch.pci_hole,
> -            MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT + MCH_HOST_BRIDGE_PCIEXBAR_MAX,
> -            IO_APIC_DEFAULT_ADDRESS - 1);
>  }
>  
>  static const TypeInfo q35_host_info = {
> @@ -338,20 +329,6 @@ static void mch_update_pciexbar(MCHPCIState *mch)
>      }
>      addr = pciexbar & addr_mask;
>      pcie_host_mmcfg_update(pehb, enable, addr, length);
> -    /* Leave enough space for the MCFG BAR */
> -    /*
> -     * TODO: this matches current bios behaviour, but it's not a power of two,
> -     * which means an MTRR can't cover it exactly.
> -     */
> -    if (enable) {
> -        range_set_bounds(&mch->pci_hole,
> -                         addr + length,
> -                         IO_APIC_DEFAULT_ADDRESS - 1);
> -    } else {
> -        range_set_bounds(&mch->pci_hole,
> -                         MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT,
> -                         IO_APIC_DEFAULT_ADDRESS - 1);
> -    }
>  }
>  
>  /* PAM */
> @@ -484,6 +461,14 @@ static void mch_update(MCHPCIState *mch)
>      mch_update_pam(mch);
>      mch_update_smram(mch);
>      mch_update_ext_tseg_mbytes(mch);
> +
> +    /*
> +     * pci hole goes from end-of-low-ram to io-apic.
> +     * mmconfig will be excluded by the dsdt builder.
> +     */
> +    range_set_bounds(&mch->pci_hole,
> +                     mch->below_4g_mem_size,
> +                     IO_APIC_DEFAULT_ADDRESS - 1);
>  }
>  
>  static int mch_post_load(void *opaque, int version_id)



^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Qemu-devel] [PATCH] q35: fix mmconfig and PCI0._CRS
  2019-05-28 20:43 [Qemu-devel] [PATCH] q35: fix mmconfig and PCI0._CRS Gerd Hoffmann
                   ` (3 preceding siblings ...)
  2019-05-30  8:58 ` Igor Mammedov
@ 2019-06-03 10:43 ` Laszlo Ersek
  2019-06-03 14:24   ` Gerd Hoffmann
  4 siblings, 1 reply; 13+ messages in thread
From: Laszlo Ersek @ 2019-06-03 10:43 UTC (permalink / raw)
  To: Gerd Hoffmann, qemu-devel
  Cc: Eduardo Habkost, Michael S. Tsirkin, Paolo Bonzini,
	Igor Mammedov, Richard Henderson

On 05/28/19 22:43, Gerd Hoffmann wrote:
> This patch changes the handling of the mmconfig area.  Thanks to the
> pci(e) expander devices we already have the logic to exclude address
> ranges from PCI0._CRS.  We can simply add the mmconfig address range
> to the list get it excluded as well.
> 
> With that in place we can go with a fixed pci hole which covers the
> whole area from the end of (low) ram to the ioapic.
> 
> This will make the whole logic alot less fragile.  No matter where the
> firmware places the mmconfig xbar, things should work correctly.  The
> guest also gets a bit more PCI address space (seabios boot):
> 
>     # cat /proc/iomem
>     [ ... ]
>     7ffdd000-7fffffff : reserved
>     80000000-afffffff : PCI Bus 0000:00            <<-- this is new
>     b0000000-bfffffff : PCI MMCONFIG 0000 [bus 00-ff]
>       b0000000-bfffffff : reserved
>     c0000000-febfffff : PCI Bus 0000:00
>       f8000000-fbffffff : 0000:00:01.0
>     [ ... ]
> 
> So this is a guest visible change.
> 
> Cc: László Érsek <lersek@redhat.com>
> Cc: Igor Mammedov <imammedo@redhat.com>
> Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
> ---
>  hw/i386/acpi-build.c | 14 ++++++++++++++
>  hw/pci-host/q35.c    | 31 ++++++++-----------------------
>  2 files changed, 22 insertions(+), 23 deletions(-)
> 
> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> index 0d78d738948c..abb0e0ce9f27 100644
> --- a/hw/i386/acpi-build.c
> +++ b/hw/i386/acpi-build.c
> @@ -122,6 +122,8 @@ typedef struct FwCfgTPMConfig {
>      uint8_t tpmppi_version;
>  } QEMU_PACKED FwCfgTPMConfig;
>  
> +static bool acpi_get_mcfg(AcpiMcfgInfo *mcfg);
> +
>  static void init_common_fadt_data(Object *o, AcpiFadtData *data)
>  {
>      uint32_t io = object_property_get_uint(o, ACPI_PM_PROP_PM_IO_BASE, NULL);
> @@ -1807,6 +1809,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
>      CrsRangeSet crs_range_set;
>      PCMachineState *pcms = PC_MACHINE(machine);
>      PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(machine);
> +    AcpiMcfgInfo mcfg;
>      uint32_t nr_mem = machine->ram_slots;
>      int root_bus_limit = 0xFF;
>      PCIBus *bus = NULL;
> @@ -1921,6 +1924,17 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
>          }
>      }
>  
> +    /*
> +     * At this point crs_range_set has all the ranges used by pci
> +     * busses *other* than PCI0.  These ranges will be excluded from
> +     * the PCI0._CRS.  Add mmconfig to the set so it will be excluded
> +     * too.
> +     */
> +    if (acpi_get_mcfg(&mcfg)) {
> +        crs_range_insert(crs_range_set.mem_ranges,
> +                         mcfg.base, mcfg.base + mcfg.size - 1);
> +    }
> +
>      scope = aml_scope("\\_SB.PCI0");
>      /* build PCI0._CRS */
>      crs = aml_resource_template();
> diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
> index 960939f5ed3e..72093320befe 100644
> --- a/hw/pci-host/q35.c
> +++ b/hw/pci-host/q35.c
> @@ -258,15 +258,6 @@ static void q35_host_initfn(Object *obj)
>      object_property_add_link(obj, MCH_HOST_PROP_IO_MEM, TYPE_MEMORY_REGION,
>                               (Object **) &s->mch.address_space_io,
>                               qdev_prop_allow_set_link_before_realize, 0, NULL);
> -
> -    /* Leave enough space for the biggest MCFG BAR */
> -    /* TODO: this matches current bios behaviour, but
> -     * it's not a power of two, which means an MTRR
> -     * can't cover it exactly.
> -     */
> -    range_set_bounds(&s->mch.pci_hole,
> -            MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT + MCH_HOST_BRIDGE_PCIEXBAR_MAX,
> -            IO_APIC_DEFAULT_ADDRESS - 1);
>  }
>  
>  static const TypeInfo q35_host_info = {
> @@ -338,20 +329,6 @@ static void mch_update_pciexbar(MCHPCIState *mch)
>      }
>      addr = pciexbar & addr_mask;
>      pcie_host_mmcfg_update(pehb, enable, addr, length);
> -    /* Leave enough space for the MCFG BAR */
> -    /*
> -     * TODO: this matches current bios behaviour, but it's not a power of two,
> -     * which means an MTRR can't cover it exactly.
> -     */
> -    if (enable) {
> -        range_set_bounds(&mch->pci_hole,
> -                         addr + length,
> -                         IO_APIC_DEFAULT_ADDRESS - 1);
> -    } else {
> -        range_set_bounds(&mch->pci_hole,
> -                         MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT,
> -                         IO_APIC_DEFAULT_ADDRESS - 1);
> -    }
>  }
>  
>  /* PAM */
> @@ -484,6 +461,14 @@ static void mch_update(MCHPCIState *mch)
>      mch_update_pam(mch);
>      mch_update_smram(mch);
>      mch_update_ext_tseg_mbytes(mch);
> +
> +    /*
> +     * pci hole goes from end-of-low-ram to io-apic.
> +     * mmconfig will be excluded by the dsdt builder.
> +     */
> +    range_set_bounds(&mch->pci_hole,
> +                     mch->below_4g_mem_size,
> +                     IO_APIC_DEFAULT_ADDRESS - 1);
>  }
>  
>  static int mch_post_load(void *opaque, int version_id)
> 

Looks plausible to me, but I've burned myself with this so frequently
that I'm not comfortable giving a formal feedback tag. I'll defer to
other reviewers (thankfully, there seem to be many, in this instance!)

One question: are we sure all guest OSes we care about can deal with
discontiguous 32-bit PCI MMIO aperture(s)? Personally, I've got no clue.
Is a "naïve" OS imaginable that looks only at the first suitable range
in the _CRS?

Thanks,
Laszlo


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Qemu-devel] [PATCH] q35: fix mmconfig and PCI0._CRS
  2019-06-03 10:43 ` Laszlo Ersek
@ 2019-06-03 14:24   ` Gerd Hoffmann
  2019-06-03 16:23     ` Laszlo Ersek
  0 siblings, 1 reply; 13+ messages in thread
From: Gerd Hoffmann @ 2019-06-03 14:24 UTC (permalink / raw)
  To: Laszlo Ersek
  Cc: Eduardo Habkost, Michael S. Tsirkin, qemu-devel, Paolo Bonzini,
	Igor Mammedov, Richard Henderson

  Hi, 

> One question: are we sure all guest OSes we care about can deal with
> discontiguous 32-bit PCI MMIO aperture(s)? Personally, I've got no clue.
> Is a "naïve" OS imaginable that looks only at the first suitable range
> in the _CRS?

Well, I know there is physical hardware doing the same thing,
my work station for example:

[ ... ]
c8000000-f7ffffff : PCI Bus 0000:00
  c8000000-c81fffff : PCI Bus 0000:01
  e0000000-efffffff : 0000:00:02.0
    e0000000-e02fffff : BOOTFB
[ ... ]
f8000000-fbffffff : PCI MMCONFIG 0000 [bus 00-3f]
  f8000000-fbffffff : Reserved
    f8000000-fbffffff : pnp 00:06
fd000000-fe7fffff : PCI Bus 0000:00
  fd000000-fdabffff : pnp 00:07
[ ... ]

Which of course is no guarantee that no naïve OS exists, but I think the
chances that we'll run into trouble with this are rather small.

cheers,
  Gerd



^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Qemu-devel] [PATCH] q35: fix mmconfig and PCI0._CRS
  2019-06-03 14:24   ` Gerd Hoffmann
@ 2019-06-03 16:23     ` Laszlo Ersek
  0 siblings, 0 replies; 13+ messages in thread
From: Laszlo Ersek @ 2019-06-03 16:23 UTC (permalink / raw)
  To: Gerd Hoffmann
  Cc: Eduardo Habkost, Michael S. Tsirkin, qemu-devel, Paolo Bonzini,
	Igor Mammedov, Richard Henderson

On 06/03/19 16:24, Gerd Hoffmann wrote:
>   Hi, 
> 
>> One question: are we sure all guest OSes we care about can deal with
>> discontiguous 32-bit PCI MMIO aperture(s)? Personally, I've got no clue.
>> Is a "naïve" OS imaginable that looks only at the first suitable range
>> in the _CRS?
> 
> Well, I know there is physical hardware doing the same thing,
> my work station for example:
> 
> [ ... ]
> c8000000-f7ffffff : PCI Bus 0000:00
>   c8000000-c81fffff : PCI Bus 0000:01
>   e0000000-efffffff : 0000:00:02.0
>     e0000000-e02fffff : BOOTFB
> [ ... ]
> f8000000-fbffffff : PCI MMCONFIG 0000 [bus 00-3f]
>   f8000000-fbffffff : Reserved
>     f8000000-fbffffff : pnp 00:06
> fd000000-fe7fffff : PCI Bus 0000:00
>   fd000000-fdabffff : pnp 00:07
> [ ... ]
> 
> Which of course is no guarantee that no naïve OS exists, but I think the
> chances that we'll run into trouble with this are rather small.

OK. Thank you.
Laszlo


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Qemu-devel] [PATCH] q35: fix mmconfig and PCI0._CRS
  2019-05-29 22:05 ` Michael S. Tsirkin
@ 2019-06-06 13:59   ` Michael S. Tsirkin
  0 siblings, 0 replies; 13+ messages in thread
From: Michael S. Tsirkin @ 2019-06-06 13:59 UTC (permalink / raw)
  To: Gerd Hoffmann
  Cc: Eduardo Habkost, qemu-devel, Paolo Bonzini, Igor Mammedov,
	László Érsek, Richard Henderson

On Wed, May 29, 2019 at 06:05:41PM -0400, Michael S. Tsirkin wrote:
> On Tue, May 28, 2019 at 10:43:31PM +0200, Gerd Hoffmann wrote:
> > This patch changes the handling of the mmconfig area.  Thanks to the
> > pci(e) expander devices we already have the logic to exclude address
> > ranges from PCI0._CRS.  We can simply add the mmconfig address range
> > to the list get it excluded as well.
> > 
> > With that in place we can go with a fixed pci hole which covers the
> > whole area from the end of (low) ram to the ioapic.
> > 
> > This will make the whole logic alot less fragile.  No matter where the
> > firmware places the mmconfig xbar, things should work correctly.  The
> > guest also gets a bit more PCI address space (seabios boot):
> > 
> >     # cat /proc/iomem
> >     [ ... ]
> >     7ffdd000-7fffffff : reserved
> >     80000000-afffffff : PCI Bus 0000:00            <<-- this is new
> >     b0000000-bfffffff : PCI MMCONFIG 0000 [bus 00-ff]
> >       b0000000-bfffffff : reserved
> >     c0000000-febfffff : PCI Bus 0000:00
> >       f8000000-fbffffff : 0000:00:01.0
> >     [ ... ]
> > 
> > So this is a guest visible change.
> > 
> > Cc: László Érsek <lersek@redhat.com>
> > Cc: Igor Mammedov <imammedo@redhat.com>
> > Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
> 
> Hi Gerd!
> Please rebase on top of latest pci tree.
> 
> After the rebase this will start failing since we
> are now asserting on any changes to ACPI tables - and the way to
> fix it is to add a comma-separated list of
> changed ACPI tables to tests/bios-tables-test-allowed-diff.h
> 
> As a maintainer I will notice this and update the expected
> files before pushing.

ping.

> > ---
> >  hw/i386/acpi-build.c | 14 ++++++++++++++
> >  hw/pci-host/q35.c    | 31 ++++++++-----------------------
> >  2 files changed, 22 insertions(+), 23 deletions(-)
> > 
> > diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> > index 0d78d738948c..abb0e0ce9f27 100644
> > --- a/hw/i386/acpi-build.c
> > +++ b/hw/i386/acpi-build.c
> > @@ -122,6 +122,8 @@ typedef struct FwCfgTPMConfig {
> >      uint8_t tpmppi_version;
> >  } QEMU_PACKED FwCfgTPMConfig;
> >  
> > +static bool acpi_get_mcfg(AcpiMcfgInfo *mcfg);
> > +
> >  static void init_common_fadt_data(Object *o, AcpiFadtData *data)
> >  {
> >      uint32_t io = object_property_get_uint(o, ACPI_PM_PROP_PM_IO_BASE, NULL);
> > @@ -1807,6 +1809,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
> >      CrsRangeSet crs_range_set;
> >      PCMachineState *pcms = PC_MACHINE(machine);
> >      PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(machine);
> > +    AcpiMcfgInfo mcfg;
> >      uint32_t nr_mem = machine->ram_slots;
> >      int root_bus_limit = 0xFF;
> >      PCIBus *bus = NULL;
> > @@ -1921,6 +1924,17 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
> >          }
> >      }
> >  
> > +    /*
> > +     * At this point crs_range_set has all the ranges used by pci
> > +     * busses *other* than PCI0.  These ranges will be excluded from
> > +     * the PCI0._CRS.  Add mmconfig to the set so it will be excluded
> > +     * too.
> > +     */
> > +    if (acpi_get_mcfg(&mcfg)) {
> > +        crs_range_insert(crs_range_set.mem_ranges,
> > +                         mcfg.base, mcfg.base + mcfg.size - 1);
> > +    }
> > +
> >      scope = aml_scope("\\_SB.PCI0");
> >      /* build PCI0._CRS */
> >      crs = aml_resource_template();
> > diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
> > index 960939f5ed3e..72093320befe 100644
> > --- a/hw/pci-host/q35.c
> > +++ b/hw/pci-host/q35.c
> > @@ -258,15 +258,6 @@ static void q35_host_initfn(Object *obj)
> >      object_property_add_link(obj, MCH_HOST_PROP_IO_MEM, TYPE_MEMORY_REGION,
> >                               (Object **) &s->mch.address_space_io,
> >                               qdev_prop_allow_set_link_before_realize, 0, NULL);
> > -
> > -    /* Leave enough space for the biggest MCFG BAR */
> > -    /* TODO: this matches current bios behaviour, but
> > -     * it's not a power of two, which means an MTRR
> > -     * can't cover it exactly.
> > -     */
> > -    range_set_bounds(&s->mch.pci_hole,
> > -            MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT + MCH_HOST_BRIDGE_PCIEXBAR_MAX,
> > -            IO_APIC_DEFAULT_ADDRESS - 1);
> >  }
> >  
> >  static const TypeInfo q35_host_info = {
> > @@ -338,20 +329,6 @@ static void mch_update_pciexbar(MCHPCIState *mch)
> >      }
> >      addr = pciexbar & addr_mask;
> >      pcie_host_mmcfg_update(pehb, enable, addr, length);
> > -    /* Leave enough space for the MCFG BAR */
> > -    /*
> > -     * TODO: this matches current bios behaviour, but it's not a power of two,
> > -     * which means an MTRR can't cover it exactly.
> > -     */
> > -    if (enable) {
> > -        range_set_bounds(&mch->pci_hole,
> > -                         addr + length,
> > -                         IO_APIC_DEFAULT_ADDRESS - 1);
> > -    } else {
> > -        range_set_bounds(&mch->pci_hole,
> > -                         MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT,
> > -                         IO_APIC_DEFAULT_ADDRESS - 1);
> > -    }
> >  }
> >  
> >  /* PAM */
> > @@ -484,6 +461,14 @@ static void mch_update(MCHPCIState *mch)
> >      mch_update_pam(mch);
> >      mch_update_smram(mch);
> >      mch_update_ext_tseg_mbytes(mch);
> > +
> > +    /*
> > +     * pci hole goes from end-of-low-ram to io-apic.
> > +     * mmconfig will be excluded by the dsdt builder.
> > +     */
> > +    range_set_bounds(&mch->pci_hole,
> > +                     mch->below_4g_mem_size,
> > +                     IO_APIC_DEFAULT_ADDRESS - 1);
> >  }
> >  
> >  static int mch_post_load(void *opaque, int version_id)
> > -- 
> > 2.18.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2019-06-06 14:00 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-05-28 20:43 [Qemu-devel] [PATCH] q35: fix mmconfig and PCI0._CRS Gerd Hoffmann
2019-05-29  4:48 ` Marcel Apfelbaum
2019-05-29  5:01   ` Gerd Hoffmann
2019-05-29  5:11     ` Marcel Apfelbaum
2019-05-29  9:38       ` Paolo Bonzini
2019-05-29 11:16 ` Igor Mammedov
2019-05-29 13:20   ` Gerd Hoffmann
2019-05-29 22:05 ` Michael S. Tsirkin
2019-06-06 13:59   ` Michael S. Tsirkin
2019-05-30  8:58 ` Igor Mammedov
2019-06-03 10:43 ` Laszlo Ersek
2019-06-03 14:24   ` Gerd Hoffmann
2019-06-03 16:23     ` Laszlo Ersek

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.