All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] VT-d: correct / extend workaround(s) leaving an IOMMU disabled
@ 2021-10-11  8:48 Jan Beulich
  2021-10-11  8:49 ` [PATCH 1/2] VT-d: generalize and correct "iommu=no-igfx" handling Jan Beulich
  2021-10-11  8:49 ` [PATCH 2/2] VT-d: Tylersburg isoch DMAR unit with no TLB space Jan Beulich
  0 siblings, 2 replies; 8+ messages in thread
From: Jan Beulich @ 2021-10-11  8:48 UTC (permalink / raw)
  To: xen-devel; +Cc: Kevin Tian, Ian Jackson

This gets us closer to what Linux does, which hopefully improves
the experience for people running Xen on affected hardware.

Ian - I'm also Cc-ing you since this feels like being on the edge
between a new feature and a bug fix.

1: generalize and correct "iommu=no-igfx" handling
2: Tylersburg isoch DMAR unit with no TLB space

Jan



^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 1/2] VT-d: generalize and correct "iommu=no-igfx" handling
  2021-10-11  8:48 [PATCH 0/2] VT-d: correct / extend workaround(s) leaving an IOMMU disabled Jan Beulich
@ 2021-10-11  8:49 ` Jan Beulich
  2021-10-12  3:39   ` Tian, Kevin
  2021-10-11  8:49 ` [PATCH 2/2] VT-d: Tylersburg isoch DMAR unit with no TLB space Jan Beulich
  1 sibling, 1 reply; 8+ messages in thread
From: Jan Beulich @ 2021-10-11  8:49 UTC (permalink / raw)
  To: xen-devel; +Cc: Kevin Tian, Ian Jackson

Linux'es supposedly equivalent "intel_iommu=igfx_off" deals with any
graphics devices (not just Intel ones) while at the same time limiting
the effect to IOMMUs covering only graphics devices. Keying the decision
to leave translation disabled for an IOMMU to merely a magic SBDF tuple
was wrong in the first place - systems may very well have non-graphics
devices at 0000:00:02.0 (ordinary root ports commonly live there, for
example). Any use of igd_drhd_address (and hence is_igd_drhd()) needs
further qualification.

Introduce a new "graphics only" field in struct acpi_drhd_unit and set
it according to device scope parsing outcome. Replace the bad use of
is_igd_drhd() in iommu_enable_translation() by use of this new field.

While adding the new field also convert the adjacent include_all one to
"bool".

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
I assume an implication is that these devices then may not be passed
through to guests, yet I don't see us enforcing this anywhere.

--- a/docs/misc/xen-command-line.pandoc
+++ b/docs/misc/xen-command-line.pandoc
@@ -1494,8 +1494,8 @@ The following options are specific to In
     version 6 and greater as Registered-Based Invalidation isn't supported
     by them.
 
-*   The `igfx` boolean is active by default, and controls whether the IOMMU in
-    front of an Intel Graphics Device is enabled or not.
+*   The `igfx` boolean is active by default, and controls whether IOMMUs in
+    front of solely graphics devices get enabled or not.
 
     It is intended as a debugging mechanism for graphics issues, and to be
     similar to Linux's `intel_iommu=igfx_off` option.  If specifying `no-igfx`
--- a/xen/drivers/passthrough/vtd/dmar.c
+++ b/xen/drivers/passthrough/vtd/dmar.c
@@ -315,6 +315,7 @@ static int __init acpi_parse_dev_scope(
     struct acpi_drhd_unit *drhd = type == DMAR_TYPE ?
         container_of(scope, struct acpi_drhd_unit, scope) : NULL;
     int depth, cnt, didx = 0, ret;
+    bool gfx_only = false;
 
     if ( (cnt = scope_device_count(start, end)) < 0 )
         return cnt;
@@ -324,6 +325,8 @@ static int __init acpi_parse_dev_scope(
         scope->devices = xzalloc_array(u16, cnt);
         if ( !scope->devices )
             return -ENOMEM;
+
+        gfx_only = drhd && !drhd->include_all;
     }
     scope->devices_cnt = cnt;
 
@@ -354,6 +357,7 @@ static int __init acpi_parse_dev_scope(
                        acpi_scope->bus, sec_bus, sub_bus);
 
             dmar_scope_add_buses(scope, sec_bus, sub_bus);
+            gfx_only = false;
             break;
 
         case ACPI_DMAR_SCOPE_TYPE_HPET:
@@ -374,6 +378,8 @@ static int __init acpi_parse_dev_scope(
                 acpi_hpet_unit->dev = path->dev;
                 acpi_hpet_unit->func = path->fn;
                 list_add(&acpi_hpet_unit->list, &drhd->hpet_list);
+
+                gfx_only = false;
             }
 
             break;
@@ -388,6 +394,12 @@ static int __init acpi_parse_dev_scope(
                 if ( (seg == 0) && (bus == 0) && (path->dev == 2) &&
                      (path->fn == 0) )
                     igd_drhd_address = drhd->address;
+
+                if ( gfx_only &&
+                     pci_conf_read8(PCI_SBDF(seg, bus, path->dev, path->fn),
+                                    PCI_CLASS_DEVICE + 1) != 0x03
+                                    /* PCI_BASE_CLASS_DISPLAY */ )
+                    gfx_only = false;
             }
 
             break;
@@ -408,6 +420,8 @@ static int __init acpi_parse_dev_scope(
                 acpi_ioapic_unit->ioapic.bdf.dev = path->dev;
                 acpi_ioapic_unit->ioapic.bdf.func = path->fn;
                 list_add(&acpi_ioapic_unit->list, &drhd->ioapic_list);
+
+                gfx_only = false;
             }
 
             break;
@@ -417,11 +431,15 @@ static int __init acpi_parse_dev_scope(
                 printk(XENLOG_WARNING VTDPREFIX "Unknown scope type %#x\n",
                        acpi_scope->entry_type);
             start += acpi_scope->length;
+            gfx_only = false;
             continue;
         }
         scope->devices[didx++] = PCI_BDF(bus, path->dev, path->fn);
         start += acpi_scope->length;
-   }
+    }
+
+    if ( drhd && gfx_only )
+        drhd->gfx_only = true;
 
     ret = 0;
 
--- a/xen/drivers/passthrough/vtd/dmar.h
+++ b/xen/drivers/passthrough/vtd/dmar.h
@@ -62,7 +62,8 @@ struct acpi_drhd_unit {
     struct list_head list;
     u64    address;                     /* register base address of the unit */
     u16    segment;
-    u8     include_all:1;
+    bool   include_all:1;
+    bool   gfx_only:1;
     struct vtd_iommu *iommu;
     struct list_head ioapic_list;
     struct list_head hpet_list;
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -751,7 +751,7 @@ static void iommu_enable_translation(str
     unsigned long flags;
     struct vtd_iommu *iommu = drhd->iommu;
 
-    if ( is_igd_drhd(drhd) )
+    if ( drhd->gfx_only )
     {
         if ( !iommu_igfx )
         {



^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 2/2] VT-d: Tylersburg isoch DMAR unit with no TLB space
  2021-10-11  8:48 [PATCH 0/2] VT-d: correct / extend workaround(s) leaving an IOMMU disabled Jan Beulich
  2021-10-11  8:49 ` [PATCH 1/2] VT-d: generalize and correct "iommu=no-igfx" handling Jan Beulich
@ 2021-10-11  8:49 ` Jan Beulich
  2021-10-11 10:56   ` [PATCH 0/2] VT-d: correct / extend workaround(s) leaving an IOMMU disabled [and 1 more messages] Ian Jackson
  2021-10-12  4:40   ` [PATCH 2/2] VT-d: Tylersburg isoch DMAR unit with no TLB space Tian, Kevin
  1 sibling, 2 replies; 8+ messages in thread
From: Jan Beulich @ 2021-10-11  8:49 UTC (permalink / raw)
  To: xen-devel; +Cc: Kevin Tian, Ian Jackson

BIOSes, when enabling the dedicated DMAR unit for the sound device,
need to also set a non-zero number of TLB entries in a respective
system management register (VTISOCHCTRL). At least one BIOS is known
to fail to do so, causing the VT-d engine to deadlock when used.

Vaguely based on Linux'es e0fc7e0b4b5e ("intel-iommu: Yet another BIOS
workaround: Isoch DMAR unit with no TLB space").

To limit message string redundancy, fold parts with the IGD quirk logic.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
RFC: This requires MMCFG availability before Dom0 starts up, which is
     not generally given. We may want/need to e.g. (ab)use the
     .enable_device() hook to actually disable translation if MMCFG
     accesses become available only in the course of Dom0 booting.
RFC: While following Linux in this regard, I'm not convinced of issuing
     the warning about the number of TLB entries when firmware set more
     than 16 (I can observe 20 on the on matching system I have access
     to.)

I assume an implication is that the device in this case then may not be
passed through to guests, but I don't see us enforcing the same anywhere
for graphics devices when "no-igfx" is in use. Yet here I would want to
follow whatever pre-existing model ...

--- a/xen/drivers/passthrough/vtd/extern.h
+++ b/xen/drivers/passthrough/vtd/extern.h
@@ -100,6 +100,7 @@ int msi_msg_write_remap_rte(struct msi_d
 int intel_setup_hpet_msi(struct msi_desc *);
 
 int is_igd_vt_enabled_quirk(void);
+bool is_azalia_tlb_enabled(const struct acpi_drhd_unit *);
 void platform_quirks_init(void);
 void vtd_ops_preamble_quirk(struct vtd_iommu *iommu);
 void vtd_ops_postamble_quirk(struct vtd_iommu *iommu);
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -750,27 +750,43 @@ static void iommu_enable_translation(str
     u32 sts;
     unsigned long flags;
     struct vtd_iommu *iommu = drhd->iommu;
+    static const char crash_fmt[] = "%s; crash Xen for security purpose\n";
 
     if ( drhd->gfx_only )
     {
+        static const char disable_fmt[] = XENLOG_WARNING VTDPREFIX
+                                          " %s; disabling IGD VT-d engine\n";
+
         if ( !iommu_igfx )
         {
-            printk(XENLOG_INFO VTDPREFIX
-                   "Passed iommu=no-igfx option.  Disabling IGD VT-d engine.\n");
+            printk(disable_fmt, "passed iommu=no-igfx option");
             return;
         }
 
         if ( !is_igd_vt_enabled_quirk() )
         {
+            static const char msg[] = "firmware did not enable IGD for VT properly";
+
             if ( force_iommu )
-                panic("BIOS did not enable IGD for VT properly, crash Xen for security purpose\n");
+                panic(crash_fmt, msg);
 
-            printk(XENLOG_WARNING VTDPREFIX
-                   "BIOS did not enable IGD for VT properly.  Disabling IGD VT-d engine.\n");
+            printk(disable_fmt, msg);
             return;
         }
     }
 
+    if ( !is_azalia_tlb_enabled(drhd) )
+    {
+        static const char msg[] = "firmware did not enable TLB for sound device";
+
+        if ( force_iommu )
+            panic(crash_fmt, msg);
+
+        printk(XENLOG_WARNING VTDPREFIX " %s; disabling ISOCH VT-d engine\n",
+               msg);
+        return;
+    }
+
     /* apply platform specific errata workarounds */
     vtd_ops_preamble_quirk(iommu);
 
--- a/xen/drivers/passthrough/vtd/quirks.c
+++ b/xen/drivers/passthrough/vtd/quirks.c
@@ -100,6 +100,69 @@ static void __init cantiga_b3_errata_ini
         is_cantiga_b3 = 1;
 }
 
+/*
+ * QUIRK to work around certain BIOSes enabling the ISOCH DMAR unit for the
+ * Azalia sound device, but not giving it any TLB entries, causing it to
+ * deadlock.
+ */
+bool is_azalia_tlb_enabled(const struct acpi_drhd_unit *drhd)
+{
+    pci_sbdf_t sbdf;
+    unsigned int vtisochctrl;
+
+    /* Only dedicated units are of interest. */
+    if ( drhd->include_all || drhd->scope.devices_cnt != 1 )
+        return true;
+
+    /* Check for the specific device. */
+    sbdf = PCI_SBDF2(drhd->segment, drhd->scope.devices[0]);
+    if ( pci_conf_read16(sbdf, PCI_VENDOR_ID) != PCI_VENDOR_ID_INTEL ||
+         pci_conf_read16(sbdf, PCI_DEVICE_ID) != 0x3a3e )
+        return true;
+
+    /* Check for the corresponding System Management Registers device. */
+    sbdf = PCI_SBDF(drhd->segment, 0, 0x14, 0);
+    if ( pci_conf_read16(sbdf, PCI_VENDOR_ID) != PCI_VENDOR_ID_INTEL ||
+         pci_conf_read16(sbdf, PCI_DEVICE_ID) != 0x342e )
+        return true;
+
+    vtisochctrl = pci_conf_read32(sbdf, 0x188);
+    if ( vtisochctrl == 0xffffffff )
+    {
+        printk(XENLOG_WARNING VTDPREFIX
+               " Cannot access VTISOCHCTRL at this time\n");
+        return true;
+    }
+
+    /*
+     * If Azalia DMA is routed to the non-isoch DMAR unit, that's fine in
+     * principle, but not consistent with the ACPI tables.
+     */
+    if ( vtisochctrl & 1 )
+    {
+        printk(XENLOG_WARNING VTDPREFIX
+               " Inconsistency between chipset registers and ACPI tables\n");
+        return true;
+    }
+
+    /* Drop all bits other than the number of TLB entries. */
+    vtisochctrl &= 0x1c;
+
+    /* If we have the recommended number of TLB entries, fine. */
+    if ( vtisochctrl == 16 )
+        return true;
+
+    /* Zero TLB entries? */
+    if ( !vtisochctrl )
+        return false;
+
+    printk(XENLOG_WARNING VTDPREFIX
+           " Recommended TLB entries for ISOCH unit is 16; firmware set %u\n",
+           vtisochctrl);
+
+    return true;
+}
+
 /* check for Sandybridge IGD device ID's */
 static void __init snb_errata_init(void)
 {



^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 0/2] VT-d: correct / extend workaround(s) leaving an IOMMU disabled [and 1 more messages]
  2021-10-11  8:49 ` [PATCH 2/2] VT-d: Tylersburg isoch DMAR unit with no TLB space Jan Beulich
@ 2021-10-11 10:56   ` Ian Jackson
  2021-10-11 11:09     ` Jan Beulich
  2021-10-12  4:40   ` [PATCH 2/2] VT-d: Tylersburg isoch DMAR unit with no TLB space Tian, Kevin
  1 sibling, 1 reply; 8+ messages in thread
From: Ian Jackson @ 2021-10-11 10:56 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, Kevin Tian

Jan Beulich writes ("[PATCH 0/2] VT-d: correct / extend workaround(s) leaving an IOMMU disabled"):
> Ian - I'm also Cc-ing you since this feels like being on the edge
> between a new feature and a bug fix.

Thanks.

I think 2/ is a new quirk (or, new behaviour for an existing quirk).
I think I am happy to treat that as a bugfix, assuming we are
reasonably confident that most systems (including in particular all
systems without the quirk) will take unchanged codepaths.  Is that
right ?

I don't understand 1/.  It looks bugfixish to me but I am really not
qualified.  I am inclined to defer to your judgement, but it would
help me if you explicitly addressed the overall risks/benefits.

But when reading the patch I did notice one thing that struck me as
undesriable:

> --- a/xen/drivers/passthrough/vtd/iommu.c
> +++ b/xen/drivers/passthrough/vtd/iommu.c
> @@ -750,27 +750,43 @@ static void iommu_enable_translation(str
>              if ( force_iommu )
> -                panic("BIOS did not enable IGD for VT properly, crash Xen for security purpose\n");
> +                panic(crash_fmt, msg);
...
> +        if ( force_iommu )
> +            panic(crash_fmt, msg);

Does this really mean that every exit path from
iommu_enable_translation that doesn't enable the iommu has to have a
check for force_iommu ?

That seems like a recipe for missing one.  And I think a missed one
would be an XSA.  Could we not structure the code some way to avoid
this foreseeable human error ?

Ian.


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 0/2] VT-d: correct / extend workaround(s) leaving an IOMMU disabled [and 1 more messages]
  2021-10-11 10:56   ` [PATCH 0/2] VT-d: correct / extend workaround(s) leaving an IOMMU disabled [and 1 more messages] Ian Jackson
@ 2021-10-11 11:09     ` Jan Beulich
  2021-10-11 13:13       ` Ian Jackson
  0 siblings, 1 reply; 8+ messages in thread
From: Jan Beulich @ 2021-10-11 11:09 UTC (permalink / raw)
  To: Ian Jackson; +Cc: xen-devel, Kevin Tian

On 11.10.2021 12:56, Ian Jackson wrote:
> Jan Beulich writes ("[PATCH 0/2] VT-d: correct / extend workaround(s) leaving an IOMMU disabled"):
>> Ian - I'm also Cc-ing you since this feels like being on the edge
>> between a new feature and a bug fix.
> 
> Thanks.
> 
> I think 2/ is a new quirk (or, new behaviour for an existing quirk).
> I think I am happy to treat that as a bugfix, assuming we are
> reasonably confident that most systems (including in particular all
> systems without the quirk) will take unchanged codepaths.  Is that
> right ?

Yes. According to Linux there's exactly one BIOS flavor known to
exhibit the issue.

> I don't understand 1/.  It looks bugfixish to me but I am really not
> qualified.  I am inclined to defer to your judgement, but it would
> help me if you explicitly addressed the overall risks/benefits.

Right now our documentation claims similarity to a Linux workaround
without the similarity actually existing in the general case. A
common case (a single integrated graphics device) is handled, but the
perhaps yet more common case of a single add-in graphics devices is
not. Plus the criteria by which a device is determined to be a
graphics one was completely flawed. Hence people in need of the
workaround may find it non-functional. However, since our doc tells
people to report if they have a need to use the option engaging the
workaround, and since we didn't have any such reports in a number
of years, I guess both benefits and possible risks here are of
purely theoretical nature. Note that I've specifically said "possible"
because I can't really see any beyond me not having properly matched
Linux'es equivalent workaround - that workaround has been in place
unchanged for very many years.

> But when reading the patch I did notice one thing that struck me as
> undesriable:
> 
>> --- a/xen/drivers/passthrough/vtd/iommu.c
>> +++ b/xen/drivers/passthrough/vtd/iommu.c
>> @@ -750,27 +750,43 @@ static void iommu_enable_translation(str
>>              if ( force_iommu )
>> -                panic("BIOS did not enable IGD for VT properly, crash Xen for security purpose\n");
>> +                panic(crash_fmt, msg);
> ...
>> +        if ( force_iommu )
>> +            panic(crash_fmt, msg);
> 
> Does this really mean that every exit path from
> iommu_enable_translation that doesn't enable the iommu has to have a
> check for force_iommu ?
> 
> That seems like a recipe for missing one.  And I think a missed one
> would be an XSA.  Could we not structure the code some way to avoid
> this foreseeable human error ?

I'm afraid I don't see a good way to do so, as imo it's desirable to
have separate log messages. IOW something like

    if ( ... )
    {
        msg = "...";
        goto dead;
    }

doesn't look any better to me. Also leaving individual IOMMUs disabled
should really be the exception anyway.

Jan



^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 0/2] VT-d: correct / extend workaround(s) leaving an IOMMU disabled [and 1 more messages]
  2021-10-11 11:09     ` Jan Beulich
@ 2021-10-11 13:13       ` Ian Jackson
  0 siblings, 0 replies; 8+ messages in thread
From: Ian Jackson @ 2021-10-11 13:13 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, Kevin Tian

Jan Beulich writes ("Re: [PATCH 0/2] VT-d: correct / extend workaround(s) leaving an IOMMU disabled [and 1 more messages]"):
> On 11.10.2021 12:56, Ian Jackson wrote:
> > I think 2/ is a new quirk (or, new behaviour for an existing quirk).
> > I think I am happy to treat that as a bugfix, assuming we are
> > reasonably confident that most systems (including in particular all
> > systems without the quirk) will take unchanged codepaths.  Is that
> > right ?
> 
> Yes. According to Linux there's exactly one BIOS flavor known to
> exhibit the issue.
> 
> > I don't understand 1/.  It looks bugfixish to me but I am really not
> > qualified.  I am inclined to defer to your judgement, but it would
> > help me if you explicitly addressed the overall risks/benefits.
> 
> Right now our documentation claims similarity to a Linux workaround
> without the similarity actually existing in the general case. A
> common case (a single integrated graphics device) is handled, but the
> perhaps yet more common case of a single add-in graphics devices is
> not. Plus the criteria by which a device is determined to be a
> graphics one was completely flawed. Hence people in need of the
> workaround may find it non-functional. However, since our doc tells
> people to report if they have a need to use the option engaging the
> workaround, and since we didn't have any such reports in a number
> of years, I guess both benefits and possible risks here are of
> purely theoretical nature. Note that I've specifically said "possible"
> because I can't really see any beyond me not having properly matched
> Linux'es equivalent workaround - that workaround has been in place
> unchanged for very many years.

OK, great.  Thanks for the explanation.  For the record,

Release-Acked-by: Ian Jackson <iwj@xenproject.org>

> > But when reading the patch I did notice one thing that struck me as
> > undesriable:
...
> > That seems like a recipe for missing one.  And I think a missed one
> > would be an XSA.  Could we not structure the code some way to avoid
> > this foreseeable human error ?
> 
> I'm afraid I don't see a good way to do so, as imo it's desirable to
> have separate log messages. IOW something like
> 
>     if ( ... )
>     {
>         msg = "...";
>         goto dead;
>     }
> 
> doesn't look any better to me. Also leaving individual IOMMUs disabled
> should really be the exception anyway.

C does not make this kind of thing easy.  I might be tempted to make
an inner function which returned a const char*, with NULL meaning "it
went OK".  Oh for a proper sum type...

Ian.


^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH 1/2] VT-d: generalize and correct "iommu=no-igfx" handling
  2021-10-11  8:49 ` [PATCH 1/2] VT-d: generalize and correct "iommu=no-igfx" handling Jan Beulich
@ 2021-10-12  3:39   ` Tian, Kevin
  0 siblings, 0 replies; 8+ messages in thread
From: Tian, Kevin @ 2021-10-12  3:39 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: Ian Jackson

> From: Jan Beulich <jbeulich@suse.com>
> Sent: Monday, October 11, 2021 4:49 PM
> 
> Linux'es supposedly equivalent "intel_iommu=igfx_off" deals with any
> graphics devices (not just Intel ones) while at the same time limiting
> the effect to IOMMUs covering only graphics devices. Keying the decision
> to leave translation disabled for an IOMMU to merely a magic SBDF tuple
> was wrong in the first place - systems may very well have non-graphics
> devices at 0000:00:02.0 (ordinary root ports commonly live there, for
> example). Any use of igd_drhd_address (and hence is_igd_drhd()) needs
> further qualification.
> 
> Introduce a new "graphics only" field in struct acpi_drhd_unit and set
> it according to device scope parsing outcome. Replace the bad use of
> is_igd_drhd() in iommu_enable_translation() by use of this new field.
> 
> While adding the new field also convert the adjacent include_all one to
> "bool".
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: Kevin Tian <kevin.tian@intel.com>

> ---
> I assume an implication is that these devices then may not be passed
> through to guests, yet I don't see us enforcing this anywhere.
> 
> --- a/docs/misc/xen-command-line.pandoc
> +++ b/docs/misc/xen-command-line.pandoc
> @@ -1494,8 +1494,8 @@ The following options are specific to In
>      version 6 and greater as Registered-Based Invalidation isn't supported
>      by them.
> 
> -*   The `igfx` boolean is active by default, and controls whether the IOMMU
> in
> -    front of an Intel Graphics Device is enabled or not.
> +*   The `igfx` boolean is active by default, and controls whether IOMMUs in
> +    front of solely graphics devices get enabled or not.
> 
>      It is intended as a debugging mechanism for graphics issues, and to be
>      similar to Linux's `intel_iommu=igfx_off` option.  If specifying `no-igfx`
> --- a/xen/drivers/passthrough/vtd/dmar.c
> +++ b/xen/drivers/passthrough/vtd/dmar.c
> @@ -315,6 +315,7 @@ static int __init acpi_parse_dev_scope(
>      struct acpi_drhd_unit *drhd = type == DMAR_TYPE ?
>          container_of(scope, struct acpi_drhd_unit, scope) : NULL;
>      int depth, cnt, didx = 0, ret;
> +    bool gfx_only = false;
> 
>      if ( (cnt = scope_device_count(start, end)) < 0 )
>          return cnt;
> @@ -324,6 +325,8 @@ static int __init acpi_parse_dev_scope(
>          scope->devices = xzalloc_array(u16, cnt);
>          if ( !scope->devices )
>              return -ENOMEM;
> +
> +        gfx_only = drhd && !drhd->include_all;
>      }
>      scope->devices_cnt = cnt;
> 
> @@ -354,6 +357,7 @@ static int __init acpi_parse_dev_scope(
>                         acpi_scope->bus, sec_bus, sub_bus);
> 
>              dmar_scope_add_buses(scope, sec_bus, sub_bus);
> +            gfx_only = false;
>              break;
> 
>          case ACPI_DMAR_SCOPE_TYPE_HPET:
> @@ -374,6 +378,8 @@ static int __init acpi_parse_dev_scope(
>                  acpi_hpet_unit->dev = path->dev;
>                  acpi_hpet_unit->func = path->fn;
>                  list_add(&acpi_hpet_unit->list, &drhd->hpet_list);
> +
> +                gfx_only = false;
>              }
> 
>              break;
> @@ -388,6 +394,12 @@ static int __init acpi_parse_dev_scope(
>                  if ( (seg == 0) && (bus == 0) && (path->dev == 2) &&
>                       (path->fn == 0) )
>                      igd_drhd_address = drhd->address;
> +
> +                if ( gfx_only &&
> +                     pci_conf_read8(PCI_SBDF(seg, bus, path->dev, path->fn),
> +                                    PCI_CLASS_DEVICE + 1) != 0x03
> +                                    /* PCI_BASE_CLASS_DISPLAY */ )
> +                    gfx_only = false;
>              }
> 
>              break;
> @@ -408,6 +420,8 @@ static int __init acpi_parse_dev_scope(
>                  acpi_ioapic_unit->ioapic.bdf.dev = path->dev;
>                  acpi_ioapic_unit->ioapic.bdf.func = path->fn;
>                  list_add(&acpi_ioapic_unit->list, &drhd->ioapic_list);
> +
> +                gfx_only = false;
>              }
> 
>              break;
> @@ -417,11 +431,15 @@ static int __init acpi_parse_dev_scope(
>                  printk(XENLOG_WARNING VTDPREFIX "Unknown scope type %#x\n",
>                         acpi_scope->entry_type);
>              start += acpi_scope->length;
> +            gfx_only = false;
>              continue;
>          }
>          scope->devices[didx++] = PCI_BDF(bus, path->dev, path->fn);
>          start += acpi_scope->length;
> -   }
> +    }
> +
> +    if ( drhd && gfx_only )
> +        drhd->gfx_only = true;
> 
>      ret = 0;
> 
> --- a/xen/drivers/passthrough/vtd/dmar.h
> +++ b/xen/drivers/passthrough/vtd/dmar.h
> @@ -62,7 +62,8 @@ struct acpi_drhd_unit {
>      struct list_head list;
>      u64    address;                     /* register base address of the unit */
>      u16    segment;
> -    u8     include_all:1;
> +    bool   include_all:1;
> +    bool   gfx_only:1;
>      struct vtd_iommu *iommu;
>      struct list_head ioapic_list;
>      struct list_head hpet_list;
> --- a/xen/drivers/passthrough/vtd/iommu.c
> +++ b/xen/drivers/passthrough/vtd/iommu.c
> @@ -751,7 +751,7 @@ static void iommu_enable_translation(str
>      unsigned long flags;
>      struct vtd_iommu *iommu = drhd->iommu;
> 
> -    if ( is_igd_drhd(drhd) )
> +    if ( drhd->gfx_only )
>      {
>          if ( !iommu_igfx )
>          {


^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH 2/2] VT-d: Tylersburg isoch DMAR unit with no TLB space
  2021-10-11  8:49 ` [PATCH 2/2] VT-d: Tylersburg isoch DMAR unit with no TLB space Jan Beulich
  2021-10-11 10:56   ` [PATCH 0/2] VT-d: correct / extend workaround(s) leaving an IOMMU disabled [and 1 more messages] Ian Jackson
@ 2021-10-12  4:40   ` Tian, Kevin
  1 sibling, 0 replies; 8+ messages in thread
From: Tian, Kevin @ 2021-10-12  4:40 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: Ian Jackson

> From: Jan Beulich <jbeulich@suse.com>
> Sent: Monday, October 11, 2021 4:50 PM
> 
> BIOSes, when enabling the dedicated DMAR unit for the sound device,
> need to also set a non-zero number of TLB entries in a respective
> system management register (VTISOCHCTRL). At least one BIOS is known
> to fail to do so, causing the VT-d engine to deadlock when used.
> 
> Vaguely based on Linux'es e0fc7e0b4b5e ("intel-iommu: Yet another BIOS
> workaround: Isoch DMAR unit with no TLB space").
> 
> To limit message string redundancy, fold parts with the IGD quirk logic.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> RFC: This requires MMCFG availability before Dom0 starts up, which is
>      not generally given. We may want/need to e.g. (ab)use the
>      .enable_device() hook to actually disable translation if MMCFG
>      accesses become available only in the course of Dom0 booting.

make sense

> RFC: While following Linux in this regard, I'm not convinced of issuing
>      the warning about the number of TLB entries when firmware set more
>      than 16 (I can observe 20 on the on matching system I have access
>      to.)

me either. Since you already observed 20 on one system, changing the
check to >=16 makes more sense.

The rest looks good to me:

	Reviewed-by: Kevin Tian <kevin.tian@intel.com>

> 
> I assume an implication is that the device in this case then may not be
> passed through to guests, but I don't see us enforcing the same anywhere
> for graphics devices when "no-igfx" is in use. Yet here I would want to
> follow whatever pre-existing model ...
> 
> --- a/xen/drivers/passthrough/vtd/extern.h
> +++ b/xen/drivers/passthrough/vtd/extern.h
> @@ -100,6 +100,7 @@ int msi_msg_write_remap_rte(struct msi_d
>  int intel_setup_hpet_msi(struct msi_desc *);
> 
>  int is_igd_vt_enabled_quirk(void);
> +bool is_azalia_tlb_enabled(const struct acpi_drhd_unit *);
>  void platform_quirks_init(void);
>  void vtd_ops_preamble_quirk(struct vtd_iommu *iommu);
>  void vtd_ops_postamble_quirk(struct vtd_iommu *iommu);
> --- a/xen/drivers/passthrough/vtd/iommu.c
> +++ b/xen/drivers/passthrough/vtd/iommu.c
> @@ -750,27 +750,43 @@ static void iommu_enable_translation(str
>      u32 sts;
>      unsigned long flags;
>      struct vtd_iommu *iommu = drhd->iommu;
> +    static const char crash_fmt[] = "%s; crash Xen for security purpose\n";
> 
>      if ( drhd->gfx_only )
>      {
> +        static const char disable_fmt[] = XENLOG_WARNING VTDPREFIX
> +                                          " %s; disabling IGD VT-d engine\n";
> +
>          if ( !iommu_igfx )
>          {
> -            printk(XENLOG_INFO VTDPREFIX
> -                   "Passed iommu=no-igfx option.  Disabling IGD VT-d engine.\n");
> +            printk(disable_fmt, "passed iommu=no-igfx option");
>              return;
>          }
> 
>          if ( !is_igd_vt_enabled_quirk() )
>          {
> +            static const char msg[] = "firmware did not enable IGD for VT
> properly";
> +
>              if ( force_iommu )
> -                panic("BIOS did not enable IGD for VT properly, crash Xen for
> security purpose\n");
> +                panic(crash_fmt, msg);
> 
> -            printk(XENLOG_WARNING VTDPREFIX
> -                   "BIOS did not enable IGD for VT properly.  Disabling IGD VT-d
> engine.\n");
> +            printk(disable_fmt, msg);
>              return;
>          }
>      }
> 
> +    if ( !is_azalia_tlb_enabled(drhd) )
> +    {
> +        static const char msg[] = "firmware did not enable TLB for sound
> device";
> +
> +        if ( force_iommu )
> +            panic(crash_fmt, msg);
> +
> +        printk(XENLOG_WARNING VTDPREFIX " %s; disabling ISOCH VT-d
> engine\n",
> +               msg);
> +        return;
> +    }
> +
>      /* apply platform specific errata workarounds */
>      vtd_ops_preamble_quirk(iommu);
> 
> --- a/xen/drivers/passthrough/vtd/quirks.c
> +++ b/xen/drivers/passthrough/vtd/quirks.c
> @@ -100,6 +100,69 @@ static void __init cantiga_b3_errata_ini
>          is_cantiga_b3 = 1;
>  }
> 
> +/*
> + * QUIRK to work around certain BIOSes enabling the ISOCH DMAR unit for
> the
> + * Azalia sound device, but not giving it any TLB entries, causing it to
> + * deadlock.
> + */
> +bool is_azalia_tlb_enabled(const struct acpi_drhd_unit *drhd)
> +{
> +    pci_sbdf_t sbdf;
> +    unsigned int vtisochctrl;
> +
> +    /* Only dedicated units are of interest. */
> +    if ( drhd->include_all || drhd->scope.devices_cnt != 1 )
> +        return true;
> +
> +    /* Check for the specific device. */
> +    sbdf = PCI_SBDF2(drhd->segment, drhd->scope.devices[0]);
> +    if ( pci_conf_read16(sbdf, PCI_VENDOR_ID) != PCI_VENDOR_ID_INTEL ||
> +         pci_conf_read16(sbdf, PCI_DEVICE_ID) != 0x3a3e )
> +        return true;
> +
> +    /* Check for the corresponding System Management Registers device. */
> +    sbdf = PCI_SBDF(drhd->segment, 0, 0x14, 0);
> +    if ( pci_conf_read16(sbdf, PCI_VENDOR_ID) != PCI_VENDOR_ID_INTEL ||
> +         pci_conf_read16(sbdf, PCI_DEVICE_ID) != 0x342e )
> +        return true;
> +
> +    vtisochctrl = pci_conf_read32(sbdf, 0x188);
> +    if ( vtisochctrl == 0xffffffff )
> +    {
> +        printk(XENLOG_WARNING VTDPREFIX
> +               " Cannot access VTISOCHCTRL at this time\n");
> +        return true;
> +    }
> +
> +    /*
> +     * If Azalia DMA is routed to the non-isoch DMAR unit, that's fine in
> +     * principle, but not consistent with the ACPI tables.
> +     */
> +    if ( vtisochctrl & 1 )
> +    {
> +        printk(XENLOG_WARNING VTDPREFIX
> +               " Inconsistency between chipset registers and ACPI tables\n");
> +        return true;
> +    }
> +
> +    /* Drop all bits other than the number of TLB entries. */
> +    vtisochctrl &= 0x1c;
> +
> +    /* If we have the recommended number of TLB entries, fine. */
> +    if ( vtisochctrl == 16 )
> +        return true;
> +
> +    /* Zero TLB entries? */
> +    if ( !vtisochctrl )
> +        return false;
> +
> +    printk(XENLOG_WARNING VTDPREFIX
> +           " Recommended TLB entries for ISOCH unit is 16; firmware set %u\n",
> +           vtisochctrl);
> +
> +    return true;
> +}
> +
>  /* check for Sandybridge IGD device ID's */
>  static void __init snb_errata_init(void)
>  {


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2021-10-12  4:40 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-10-11  8:48 [PATCH 0/2] VT-d: correct / extend workaround(s) leaving an IOMMU disabled Jan Beulich
2021-10-11  8:49 ` [PATCH 1/2] VT-d: generalize and correct "iommu=no-igfx" handling Jan Beulich
2021-10-12  3:39   ` Tian, Kevin
2021-10-11  8:49 ` [PATCH 2/2] VT-d: Tylersburg isoch DMAR unit with no TLB space Jan Beulich
2021-10-11 10:56   ` [PATCH 0/2] VT-d: correct / extend workaround(s) leaving an IOMMU disabled [and 1 more messages] Ian Jackson
2021-10-11 11:09     ` Jan Beulich
2021-10-11 13:13       ` Ian Jackson
2021-10-12  4:40   ` [PATCH 2/2] VT-d: Tylersburg isoch DMAR unit with no TLB space Tian, Kevin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.