All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] pci: a workaround for nonstandard PCI devices whose PBA shares
@ 2018-04-04 15:29 Chao Gao
  2018-04-04 15:45 ` Roger Pau Monné
  2018-04-05  9:34 ` Roger Pau Monné
  0 siblings, 2 replies; 13+ messages in thread
From: Chao Gao @ 2018-04-04 15:29 UTC (permalink / raw)
  To: xen-devel
  Cc: Stefano Stabellini, Wei Liu, George Dunlap, Ian Jackson,
	Tim Deegan, Julien Grall, Jan Beulich, Andrew Cooper, Chao Gao

... the same page with other registers which are not relevant to MSI-X. Xen
marks pages where PBA resides as read-only. When assigning such devices to
guest, device driver writes MSI-X irrelevant registers on those pages would
lead to an EPT violation and the guest is destroyed because no handler is
registered for those address range. In order to make guest capable to use such
kind of devices, trapping very frequent write accesses is not a good idea for
it would significantly impact the performance.

This patch provides a workaround with caveat. Specifically, an option is
introduced to specify a list of devices. For those devices, Xen doesn't
control the access right to pages where PBA resides. Hence, guest device
driver is able to write those pages and functions well. Note that adding an
untrusted device to this option may endanger security of the entire system.

Signed-off-by: Chao Gao <chao.gao@intel.com>
---
 docs/misc/xen-command-line.markdown | 10 +++++++++
 xen/arch/x86/msi.c                  |  7 ++++--
 xen/drivers/passthrough/pci.c       | 45 +++++++++++++++++++++++++++++++++++--
 xen/include/asm-x86/msi.h           |  1 +
 4 files changed, 59 insertions(+), 4 deletions(-)

diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown
index b353352..e382513 100644
--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -1423,6 +1423,16 @@ Defaults to booting secondary processors.
 
 > Default: `on`
 
+### pba\_quirk
+> `= List of [<seg>:]<bus>:<device>.<function>
+
+Specify a list of SBDF of devices. When assigning devices in this list to
+guest, reading or writing the page where MSI-X PBA resides are allowed.
+This option provides a workaround for nonstandard PCI devices whose
+MSI-X PBA shares the same 4K-byte page with other registers. Note that
+adding an untrusted device to this option would undermine security of
+the entire system.
+
 ### pci
 > `= {no-}serr | {no-}perr`
 
diff --git a/xen/arch/x86/msi.c b/xen/arch/x86/msi.c
index 5567990..2abf2cf 100644
--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -992,7 +992,9 @@ static int msix_capability_init(struct pci_dev *dev,
         if ( rangeset_add_range(mmio_ro_ranges, msix->table.first,
                                 msix->table.last) )
             WARN();
-        if ( rangeset_add_range(mmio_ro_ranges, msix->pba.first,
+
+        if ( !msix->pba_quirk_enabled &&
+             rangeset_add_range(mmio_ro_ranges, msix->pba.first,
                                 msix->pba.last) )
             WARN();
 
@@ -1139,7 +1141,8 @@ static void _pci_cleanup_msix(struct arch_msix *msix)
         if ( rangeset_remove_range(mmio_ro_ranges, msix->table.first,
                                    msix->table.last) )
             WARN();
-        if ( rangeset_remove_range(mmio_ro_ranges, msix->pba.first,
+        if ( !msix->pba_quirk_enabled &&
+             rangeset_remove_range(mmio_ro_ranges, msix->pba.first,
                                    msix->pba.last) )
             WARN();
     }
diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
index 1db69d5..cd765ef 100644
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -184,6 +184,38 @@ static int __init parse_phantom_dev(const char *str)
 }
 custom_param("pci-phantom", parse_phantom_dev);
 
+static struct pba_quirk_dev {
+    uint32_t sbdf;
+} pba_quirk_devs[8];
+static unsigned int nr_pba_quirk_devs;
+
+static int __init parse_pba_quirk(const char *str)
+{
+    unsigned int seg, bus, dev, func;
+
+    for ( ; ; )
+    {
+        if ( nr_pba_quirk_devs >= ARRAY_SIZE(pba_quirk_devs) )
+            return -E2BIG;
+
+        str = parse_pci(str, &seg, &bus, &dev, &func);
+        if ( !str )
+            return -EINVAL;
+
+        pba_quirk_devs[nr_pba_quirk_devs++].sbdf = PCI_SBDF(seg, bus, dev,
+                                                            func);
+        if ( *str == ',' )
+            str++;
+        else if ( !*str )
+            break;
+        else
+            return -EINVAL;
+    }
+
+    return 0;
+}
+custom_param("pba_quirk", parse_pba_quirk);
+
 static u16 __read_mostly command_mask;
 static u16 __read_mostly bridge_ctl_mask;
 
@@ -300,6 +332,7 @@ static void check_pdev(const struct pci_dev *pdev)
 
 static struct pci_dev *alloc_pdev(struct pci_seg *pseg, u8 bus, u8 devfn)
 {
+    unsigned int i;
     struct pci_dev *pdev;
 
     list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list )
@@ -328,6 +361,16 @@ static struct pci_dev *alloc_pdev(struct pci_seg *pseg, u8 bus, u8 devfn)
         }
         spin_lock_init(&msix->table_lock);
         pdev->msix = msix;
+
+        for ( i = 0; i < nr_pba_quirk_devs; i++ )
+        {
+            if ( pba_quirk_devs[i].sbdf == PCI_SBDF3(pseg->nr, bus, devfn) )
+            {
+                pdev->msix->pba_quirk_enabled = true;
+                printk(XENLOG_WARNING "Enable PBA quirk for %04x:%02x:%02x.%u\n",
+                       pseg->nr, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+            }
+        }
     }
 
     list_add(&pdev->alldevs_list, &pseg->alldevs_list);
@@ -371,8 +414,6 @@ static struct pci_dev *alloc_pdev(struct pci_seg *pseg, u8 bus, u8 devfn)
             }
             else
             {
-                unsigned int i;
-
                 for ( i = 0; i < nr_phantom_devs; ++i )
                     if ( phantom_devs[i].seg == pseg->nr &&
                          phantom_devs[i].bus == bus &&
diff --git a/xen/include/asm-x86/msi.h b/xen/include/asm-x86/msi.h
index 10387dc..1645d1b 100644
--- a/xen/include/asm-x86/msi.h
+++ b/xen/include/asm-x86/msi.h
@@ -239,6 +239,7 @@ struct arch_msix {
     int table_idx[MAX_MSIX_TABLE_PAGES];
     spinlock_t table_lock;
     bool host_maskall, guest_maskall;
+    bool pba_quirk_enabled;
     domid_t warned;
 };
 
-- 
1.8.3.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH] pci: a workaround for nonstandard PCI devices whose PBA shares
  2018-04-04 15:29 [PATCH] pci: a workaround for nonstandard PCI devices whose PBA shares Chao Gao
@ 2018-04-04 15:45 ` Roger Pau Monné
  2018-04-04 16:20   ` Chao Gao
  2018-04-05  9:34 ` Roger Pau Monné
  1 sibling, 1 reply; 13+ messages in thread
From: Roger Pau Monné @ 2018-04-04 15:45 UTC (permalink / raw)
  To: Chao Gao
  Cc: Stefano Stabellini, Wei Liu, George Dunlap, Tim Deegan,
	Ian Jackson, xen-devel, Julien Grall, Jan Beulich, Andrew Cooper

On Wed, Apr 04, 2018 at 11:29:39PM +0800, Chao Gao wrote:
> ... the same page with other registers which are not relevant to MSI-X. Xen
> marks pages where PBA resides as read-only. When assigning such devices to
> guest, device driver writes MSI-X irrelevant registers on those pages would
> lead to an EPT violation and the guest is destroyed because no handler is
> registered for those address range. In order to make guest capable to use such
> kind of devices, trapping very frequent write accesses is not a good idea for
> it would significantly impact the performance.
> 
> This patch provides a workaround with caveat. Specifically, an option is
> introduced to specify a list of devices. For those devices, Xen doesn't
> control the access right to pages where PBA resides. Hence, guest device
> driver is able to write those pages and functions well. Note that adding an
> untrusted device to this option may endanger security of the entire system.

This is a clear violation of the MSI-X spec. Out of curiosity, which
device is it that places random registers in the same page as the PBA?

Thanks, Roger.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] pci: a workaround for nonstandard PCI devices whose PBA shares
  2018-04-04 15:45 ` Roger Pau Monné
@ 2018-04-04 16:20   ` Chao Gao
  0 siblings, 0 replies; 13+ messages in thread
From: Chao Gao @ 2018-04-04 16:20 UTC (permalink / raw)
  To: Roger Pau Monné
  Cc: Stefano Stabellini, Wei Liu, George Dunlap, Tim Deegan,
	Ian Jackson, xen-devel, Julien Grall, Jan Beulich, Andrew Cooper

On Wed, Apr 04, 2018 at 04:45:32PM +0100, Roger Pau Monné wrote:
>On Wed, Apr 04, 2018 at 11:29:39PM +0800, Chao Gao wrote:
>> ... the same page with other registers which are not relevant to MSI-X. Xen
>> marks pages where PBA resides as read-only. When assigning such devices to
>> guest, device driver writes MSI-X irrelevant registers on those pages would
>> lead to an EPT violation and the guest is destroyed because no handler is
>> registered for those address range. In order to make guest capable to use such
>> kind of devices, trapping very frequent write accesses is not a good idea for
>> it would significantly impact the performance.
>> 
>> This patch provides a workaround with caveat. Specifically, an option is
>> introduced to specify a list of devices. For those devices, Xen doesn't
>> control the access right to pages where PBA resides. Hence, guest device
>> driver is able to write those pages and functions well. Note that adding an
>> untrusted device to this option may endanger security of the entire system.
>
>This is a clear violation of the MSI-X spec. Out of curiosity, which

Yes, that's why we have this patch -- to workaround a hardware issue.

>device is it that places random registers in the same page as the PBA?

According to the commit [1], Mellanox MT27500 series, ConnectX-3 VF.
And, a generation of Intel's Omni-Path.

[1]:https://git.qemu.org/?p=qemu.git;a=commit;h=95239e162518dc6577164be3d9a789aba7f591a3

Could you help to give some comments? :).

Thanks
Chao

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] pci: a workaround for nonstandard PCI devices whose PBA shares
  2018-04-04 15:29 [PATCH] pci: a workaround for nonstandard PCI devices whose PBA shares Chao Gao
  2018-04-04 15:45 ` Roger Pau Monné
@ 2018-04-05  9:34 ` Roger Pau Monné
  2018-04-05  9:40   ` George Dunlap
  2018-04-05 11:00   ` Chao Gao
  1 sibling, 2 replies; 13+ messages in thread
From: Roger Pau Monné @ 2018-04-05  9:34 UTC (permalink / raw)
  To: Chao Gao
  Cc: Stefano Stabellini, Wei Liu, George Dunlap, Tim Deegan,
	Ian Jackson, xen-devel, Julien Grall, Jan Beulich, Andrew Cooper

On Wed, Apr 04, 2018 at 11:29:39PM +0800, Chao Gao wrote:
> ... the same page with other registers which are not relevant to MSI-X. Xen
> marks pages where PBA resides as read-only. When assigning such devices to
> guest, device driver writes MSI-X irrelevant registers on those pages would
> lead to an EPT violation and the guest is destroyed because no handler is
> registered for those address range. In order to make guest capable to use such
> kind of devices, trapping very frequent write accesses is not a good idea for
> it would significantly impact the performance.
> 
> This patch provides a workaround with caveat. Specifically, an option is
> introduced to specify a list of devices. For those devices, Xen doesn't
> control the access right to pages where PBA resides. Hence, guest device
> driver is able to write those pages and functions well. Note that adding an
> untrusted device to this option may endanger security of the entire system.
> 
> Signed-off-by: Chao Gao <chao.gao@intel.com>
> ---
>  docs/misc/xen-command-line.markdown | 10 +++++++++
>  xen/arch/x86/msi.c                  |  7 ++++--
>  xen/drivers/passthrough/pci.c       | 45 +++++++++++++++++++++++++++++++++++--
>  xen/include/asm-x86/msi.h           |  1 +
>  4 files changed, 59 insertions(+), 4 deletions(-)
> 
> diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown
> index b353352..e382513 100644
> --- a/docs/misc/xen-command-line.markdown
> +++ b/docs/misc/xen-command-line.markdown
> @@ -1423,6 +1423,16 @@ Defaults to booting secondary processors.
>  
>  > Default: `on`
>  
> +### pba\_quirk

pba_write_allowed would be better, pba_quirk is too generic IMO.

> +> `= List of [<seg>:]<bus>:<device>.<function>
> +
> +Specify a list of SBDF of devices. When assigning devices in this list to
> +guest, reading or writing the page where MSI-X PBA resides are allowed.
> +This option provides a workaround for nonstandard PCI devices whose
> +MSI-X PBA shares the same 4K-byte page with other registers. Note that
> +adding an untrusted device to this option would undermine security of
> +the entire system.
> +
>  ### pci
>  > `= {no-}serr | {no-}perr`
>  
> diff --git a/xen/arch/x86/msi.c b/xen/arch/x86/msi.c
> index 5567990..2abf2cf 100644
> --- a/xen/arch/x86/msi.c
> +++ b/xen/arch/x86/msi.c
> @@ -992,7 +992,9 @@ static int msix_capability_init(struct pci_dev *dev,
>          if ( rangeset_add_range(mmio_ro_ranges, msix->table.first,
>                                  msix->table.last) )
>              WARN();
> -        if ( rangeset_add_range(mmio_ro_ranges, msix->pba.first,
> +
> +        if ( !msix->pba_quirk_enabled &&
> +             rangeset_add_range(mmio_ro_ranges, msix->pba.first,
>                                  msix->pba.last) )
>              WARN();

This will work fine as long as the PBA is not in the same page as the
MSI-X table. In such case you will also need changes to QEMU (see
pci_msix_write), so that writes to the memory in the same page as the
MSI-X/PBA tables are forwarded to the underlying hardware.

You should add least add something like:

if ( msix->pba_quirk_enabled &&
     msix->table.first <= msix->pba.last &&
     msix->pba.first <= msix->table.last )
{
    printk("PBA write not allowed to dev %04x:%02x:%02x.%u due to MSI-X table overlap\n");
    return -ENXIO;
}

Or similar if the QEMU side is not fixed.

Note that in order to fix the QEMU side you would probably have to add
a flag to xl 'pci' config option and pass it to both QEMU and Xen.

>  
> @@ -1139,7 +1141,8 @@ static void _pci_cleanup_msix(struct arch_msix *msix)
>          if ( rangeset_remove_range(mmio_ro_ranges, msix->table.first,
>                                     msix->table.last) )
>              WARN();
> -        if ( rangeset_remove_range(mmio_ro_ranges, msix->pba.first,
> +        if ( !msix->pba_quirk_enabled &&
> +             rangeset_remove_range(mmio_ro_ranges, msix->pba.first,
>                                     msix->pba.last) )
>              WARN();
>      }
> diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
> index 1db69d5..cd765ef 100644
> --- a/xen/drivers/passthrough/pci.c
> +++ b/xen/drivers/passthrough/pci.c
> @@ -184,6 +184,38 @@ static int __init parse_phantom_dev(const char *str)
>  }
>  custom_param("pci-phantom", parse_phantom_dev);
>  
> +static struct pba_quirk_dev {
> +    uint32_t sbdf;
> +} pba_quirk_devs[8];

We have a sbdf type now, see 514f58.

Also, I would prefer that you use a list here. I know it's not likely
to have a huge number of devices in the system that require this
quirk, but I also see no reason to limit this to 8 (or any other
arbitrary value).

> +static unsigned int nr_pba_quirk_devs;
> +
> +static int __init parse_pba_quirk(const char *str)
> +{
> +    unsigned int seg, bus, dev, func;
> +
> +    for ( ; ; )
> +    {
> +        if ( nr_pba_quirk_devs >= ARRAY_SIZE(pba_quirk_devs) )
> +            return -E2BIG;
> +
> +        str = parse_pci(str, &seg, &bus, &dev, &func);
> +        if ( !str )
> +            return -EINVAL;
> +
> +        pba_quirk_devs[nr_pba_quirk_devs++].sbdf = PCI_SBDF(seg, bus, dev,
> +                                                            func);
> +        if ( *str == ',' )
> +            str++;
> +        else if ( !*str )
> +            break;
> +        else
> +            return -EINVAL;
> +    }
> +
> +    return 0;
> +}
> +custom_param("pba_quirk", parse_pba_quirk);
> +
>  static u16 __read_mostly command_mask;
>  static u16 __read_mostly bridge_ctl_mask;
>  
> @@ -300,6 +332,7 @@ static void check_pdev(const struct pci_dev *pdev)
>  
>  static struct pci_dev *alloc_pdev(struct pci_seg *pseg, u8 bus, u8 devfn)
>  {
> +    unsigned int i;
>      struct pci_dev *pdev;
>  
>      list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list )
> @@ -328,6 +361,16 @@ static struct pci_dev *alloc_pdev(struct pci_seg *pseg, u8 bus, u8 devfn)
>          }
>          spin_lock_init(&msix->table_lock);
>          pdev->msix = msix;
> +
> +        for ( i = 0; i < nr_pba_quirk_devs; i++ )
> +        {
> +            if ( pba_quirk_devs[i].sbdf == PCI_SBDF3(pseg->nr, bus, devfn) )
> +            {
> +                pdev->msix->pba_quirk_enabled = true;
> +                printk(XENLOG_WARNING "Enable PBA quirk for %04x:%02x:%02x.%u\n",
> +                       pseg->nr, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
> +            }

You could do this as:

if ( pba_quirk_devs[i].sbdf != PCI_SBDF3(pseg->nr, bus, devfn) )
    continue;

pdev->msix->pba_quirk_enabled = true;
printk(XENLOG_WARNING "Enable PBA quirk for %04x:%02x:%02x.%u\n",
       pseg->nr, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
break;

Which removes one level of indentation (and also exits the loop as
soon as a match is found).

Thanks, Roger.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] pci: a workaround for nonstandard PCI devices whose PBA shares
  2018-04-05  9:34 ` Roger Pau Monné
@ 2018-04-05  9:40   ` George Dunlap
  2018-04-05  9:46     ` Roger Pau Monné
  2018-04-05 11:00   ` Chao Gao
  1 sibling, 1 reply; 13+ messages in thread
From: George Dunlap @ 2018-04-05  9:40 UTC (permalink / raw)
  To: Roger Pau Monné, Chao Gao
  Cc: Stefano Stabellini, Wei Liu, George Dunlap, Tim Deegan,
	Ian Jackson, xen-devel, Julien Grall, Jan Beulich, Andrew Cooper

On 04/05/2018 10:34 AM, Roger Pau Monné wrote:
> On Wed, Apr 04, 2018 at 11:29:39PM +0800, Chao Gao wrote:
>> ... the same page with other registers which are not relevant to MSI-X. Xen
>> marks pages where PBA resides as read-only. When assigning such devices to
>> guest, device driver writes MSI-X irrelevant registers on those pages would
>> lead to an EPT violation and the guest is destroyed because no handler is
>> registered for those address range. In order to make guest capable to use such
>> kind of devices, trapping very frequent write accesses is not a good idea for
>> it would significantly impact the performance.
>>
>> This patch provides a workaround with caveat. Specifically, an option is
>> introduced to specify a list of devices. For those devices, Xen doesn't
>> control the access right to pages where PBA resides. Hence, guest device
>> driver is able to write those pages and functions well. Note that adding an
>> untrusted device to this option may endanger security of the entire system.
>>
>> Signed-off-by: Chao Gao <chao.gao@intel.com>
>> ---
>>  docs/misc/xen-command-line.markdown | 10 +++++++++
>>  xen/arch/x86/msi.c                  |  7 ++++--
>>  xen/drivers/passthrough/pci.c       | 45 +++++++++++++++++++++++++++++++++++--
>>  xen/include/asm-x86/msi.h           |  1 +
>>  4 files changed, 59 insertions(+), 4 deletions(-)
>>
>> diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown
>> index b353352..e382513 100644
>> --- a/docs/misc/xen-command-line.markdown
>> +++ b/docs/misc/xen-command-line.markdown
>> @@ -1423,6 +1423,16 @@ Defaults to booting secondary processors.
>>  
>>  > Default: `on`
>>  
>> +### pba\_quirk
> 
> pba_write_allowed would be better, pba_quirk is too generic IMO.

'quirk' was I think requested by Jan; and my understanding is that the
word clearly indicates that the behavior in question is a workaround for
hardware which is not compliant with the appropriate specification.  If
you grep the source tree for 'quirk' you'll find a fairly large number.

pba_shared_quirk might be slightly more descriptive.

 -George

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] pci: a workaround for nonstandard PCI devices whose PBA shares
  2018-04-05  9:40   ` George Dunlap
@ 2018-04-05  9:46     ` Roger Pau Monné
  2018-04-05  9:52       ` George Dunlap
  0 siblings, 1 reply; 13+ messages in thread
From: Roger Pau Monné @ 2018-04-05  9:46 UTC (permalink / raw)
  To: George Dunlap
  Cc: Stefano Stabellini, Wei Liu, George Dunlap, Tim Deegan,
	Ian Jackson, xen-devel, Julien Grall, Jan Beulich, Andrew Cooper,
	Chao Gao

On Thu, Apr 05, 2018 at 10:40:37AM +0100, George Dunlap wrote:
> On 04/05/2018 10:34 AM, Roger Pau Monné wrote:
> > On Wed, Apr 04, 2018 at 11:29:39PM +0800, Chao Gao wrote:
> >> ... the same page with other registers which are not relevant to MSI-X. Xen
> >> marks pages where PBA resides as read-only. When assigning such devices to
> >> guest, device driver writes MSI-X irrelevant registers on those pages would
> >> lead to an EPT violation and the guest is destroyed because no handler is
> >> registered for those address range. In order to make guest capable to use such
> >> kind of devices, trapping very frequent write accesses is not a good idea for
> >> it would significantly impact the performance.
> >>
> >> This patch provides a workaround with caveat. Specifically, an option is
> >> introduced to specify a list of devices. For those devices, Xen doesn't
> >> control the access right to pages where PBA resides. Hence, guest device
> >> driver is able to write those pages and functions well. Note that adding an
> >> untrusted device to this option may endanger security of the entire system.
> >>
> >> Signed-off-by: Chao Gao <chao.gao@intel.com>
> >> ---
> >>  docs/misc/xen-command-line.markdown | 10 +++++++++
> >>  xen/arch/x86/msi.c                  |  7 ++++--
> >>  xen/drivers/passthrough/pci.c       | 45 +++++++++++++++++++++++++++++++++++--
> >>  xen/include/asm-x86/msi.h           |  1 +
> >>  4 files changed, 59 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown
> >> index b353352..e382513 100644
> >> --- a/docs/misc/xen-command-line.markdown
> >> +++ b/docs/misc/xen-command-line.markdown
> >> @@ -1423,6 +1423,16 @@ Defaults to booting secondary processors.
> >>  
> >>  > Default: `on`
> >>  
> >> +### pba\_quirk
> > 
> > pba_write_allowed would be better, pba_quirk is too generic IMO.
> 
> 'quirk' was I think requested by Jan; and my understanding is that the
> word clearly indicates that the behavior in question is a workaround for
> hardware which is not compliant with the appropriate specification.  If
> you grep the source tree for 'quirk' you'll find a fairly large number.
> 
> pba_shared_quirk might be slightly more descriptive.

pba_write_quirk?

I just think it should be slightly more descriptive than pba_quirk in
case Xen has to add further PBA-related quirks in the future.

Roger.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] pci: a workaround for nonstandard PCI devices whose PBA shares
  2018-04-05  9:46     ` Roger Pau Monné
@ 2018-04-05  9:52       ` George Dunlap
  2018-04-05  9:59         ` Roger Pau Monné
  0 siblings, 1 reply; 13+ messages in thread
From: George Dunlap @ 2018-04-05  9:52 UTC (permalink / raw)
  To: Roger Pau Monné
  Cc: Stefano Stabellini, Wei Liu, George Dunlap, Tim Deegan,
	Ian Jackson, xen-devel, Julien Grall, Jan Beulich, Andrew Cooper,
	Chao Gao

On 04/05/2018 10:46 AM, Roger Pau Monné wrote:
> On Thu, Apr 05, 2018 at 10:40:37AM +0100, George Dunlap wrote:
>> On 04/05/2018 10:34 AM, Roger Pau Monné wrote:
>>> On Wed, Apr 04, 2018 at 11:29:39PM +0800, Chao Gao wrote:
>>>> ... the same page with other registers which are not relevant to MSI-X. Xen
>>>> marks pages where PBA resides as read-only. When assigning such devices to
>>>> guest, device driver writes MSI-X irrelevant registers on those pages would
>>>> lead to an EPT violation and the guest is destroyed because no handler is
>>>> registered for those address range. In order to make guest capable to use such
>>>> kind of devices, trapping very frequent write accesses is not a good idea for
>>>> it would significantly impact the performance.
>>>>
>>>> This patch provides a workaround with caveat. Specifically, an option is
>>>> introduced to specify a list of devices. For those devices, Xen doesn't
>>>> control the access right to pages where PBA resides. Hence, guest device
>>>> driver is able to write those pages and functions well. Note that adding an
>>>> untrusted device to this option may endanger security of the entire system.
>>>>
>>>> Signed-off-by: Chao Gao <chao.gao@intel.com>
>>>> ---
>>>>  docs/misc/xen-command-line.markdown | 10 +++++++++
>>>>  xen/arch/x86/msi.c                  |  7 ++++--
>>>>  xen/drivers/passthrough/pci.c       | 45 +++++++++++++++++++++++++++++++++++--
>>>>  xen/include/asm-x86/msi.h           |  1 +
>>>>  4 files changed, 59 insertions(+), 4 deletions(-)
>>>>
>>>> diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown
>>>> index b353352..e382513 100644
>>>> --- a/docs/misc/xen-command-line.markdown
>>>> +++ b/docs/misc/xen-command-line.markdown
>>>> @@ -1423,6 +1423,16 @@ Defaults to booting secondary processors.
>>>>  
>>>>  > Default: `on`
>>>>  
>>>> +### pba\_quirk
>>>
>>> pba_write_allowed would be better, pba_quirk is too generic IMO.
>>
>> 'quirk' was I think requested by Jan; and my understanding is that the
>> word clearly indicates that the behavior in question is a workaround for
>> hardware which is not compliant with the appropriate specification.  If
>> you grep the source tree for 'quirk' you'll find a fairly large number.
>>
>> pba_shared_quirk might be slightly more descriptive.
> 
> pba_write_quirk?
> 
> I just think it should be slightly more descriptive than pba_quirk in
> case Xen has to add further PBA-related quirks in the future.

"shared" tells you something about the quirk itself: The PBA is shared
across multiple devices.  "write" tells you about the work-around:
unsafe writes to the PBA region are allowed.

I think it makes more sense for the name to describe the quirk itself
rather than the work-around.  The description says what the work-around
does and why it's unsafe.

 -George

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] pci: a workaround for nonstandard PCI devices whose PBA shares
  2018-04-05  9:52       ` George Dunlap
@ 2018-04-05  9:59         ` Roger Pau Monné
  2018-04-05 10:08           ` George Dunlap
  0 siblings, 1 reply; 13+ messages in thread
From: Roger Pau Monné @ 2018-04-05  9:59 UTC (permalink / raw)
  To: George Dunlap
  Cc: Stefano Stabellini, Wei Liu, George Dunlap, Tim Deegan,
	Ian Jackson, xen-devel, Julien Grall, Jan Beulich, Andrew Cooper,
	Chao Gao

On Thu, Apr 05, 2018 at 10:52:09AM +0100, George Dunlap wrote:
> On 04/05/2018 10:46 AM, Roger Pau Monné wrote:
> > On Thu, Apr 05, 2018 at 10:40:37AM +0100, George Dunlap wrote:
> >> On 04/05/2018 10:34 AM, Roger Pau Monné wrote:
> >>> On Wed, Apr 04, 2018 at 11:29:39PM +0800, Chao Gao wrote:
> >>>> ... the same page with other registers which are not relevant to MSI-X. Xen
> >>>> marks pages where PBA resides as read-only. When assigning such devices to
> >>>> guest, device driver writes MSI-X irrelevant registers on those pages would
> >>>> lead to an EPT violation and the guest is destroyed because no handler is
> >>>> registered for those address range. In order to make guest capable to use such
> >>>> kind of devices, trapping very frequent write accesses is not a good idea for
> >>>> it would significantly impact the performance.
> >>>>
> >>>> This patch provides a workaround with caveat. Specifically, an option is
> >>>> introduced to specify a list of devices. For those devices, Xen doesn't
> >>>> control the access right to pages where PBA resides. Hence, guest device
> >>>> driver is able to write those pages and functions well. Note that adding an
> >>>> untrusted device to this option may endanger security of the entire system.
> >>>>
> >>>> Signed-off-by: Chao Gao <chao.gao@intel.com>
> >>>> ---
> >>>>  docs/misc/xen-command-line.markdown | 10 +++++++++
> >>>>  xen/arch/x86/msi.c                  |  7 ++++--
> >>>>  xen/drivers/passthrough/pci.c       | 45 +++++++++++++++++++++++++++++++++++--
> >>>>  xen/include/asm-x86/msi.h           |  1 +
> >>>>  4 files changed, 59 insertions(+), 4 deletions(-)
> >>>>
> >>>> diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown
> >>>> index b353352..e382513 100644
> >>>> --- a/docs/misc/xen-command-line.markdown
> >>>> +++ b/docs/misc/xen-command-line.markdown
> >>>> @@ -1423,6 +1423,16 @@ Defaults to booting secondary processors.
> >>>>  
> >>>>  > Default: `on`
> >>>>  
> >>>> +### pba\_quirk
> >>>
> >>> pba_write_allowed would be better, pba_quirk is too generic IMO.
> >>
> >> 'quirk' was I think requested by Jan; and my understanding is that the
> >> word clearly indicates that the behavior in question is a workaround for
> >> hardware which is not compliant with the appropriate specification.  If
> >> you grep the source tree for 'quirk' you'll find a fairly large number.
> >>
> >> pba_shared_quirk might be slightly more descriptive.
> > 
> > pba_write_quirk?
> > 
> > I just think it should be slightly more descriptive than pba_quirk in
> > case Xen has to add further PBA-related quirks in the future.
> 
> "shared" tells you something about the quirk itself: The PBA is shared
> across multiple devices.  "write" tells you about the work-around:
> unsafe writes to the PBA region are allowed.

I don't think the PBA page is shared with multiple devices in any
case. The problem here is that the PBA page contains other registers
(from the same device as the PBA) that must be RW instead of RO.

Roger.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] pci: a workaround for nonstandard PCI devices whose PBA shares
  2018-04-05  9:59         ` Roger Pau Monné
@ 2018-04-05 10:08           ` George Dunlap
  2018-04-05 11:48             ` Chao Gao
  0 siblings, 1 reply; 13+ messages in thread
From: George Dunlap @ 2018-04-05 10:08 UTC (permalink / raw)
  To: Roger Pau Monné
  Cc: Stefano Stabellini, Wei Liu, George Dunlap, Tim Deegan,
	Ian Jackson, xen-devel, Julien Grall, Jan Beulich, Andrew Cooper,
	Chao Gao

On 04/05/2018 10:59 AM, Roger Pau Monné wrote:
> On Thu, Apr 05, 2018 at 10:52:09AM +0100, George Dunlap wrote:
>> On 04/05/2018 10:46 AM, Roger Pau Monné wrote:
>>> On Thu, Apr 05, 2018 at 10:40:37AM +0100, George Dunlap wrote:
>>>> On 04/05/2018 10:34 AM, Roger Pau Monné wrote:
>>>>> On Wed, Apr 04, 2018 at 11:29:39PM +0800, Chao Gao wrote:
>>>>>> ... the same page with other registers which are not relevant to MSI-X. Xen
>>>>>> marks pages where PBA resides as read-only. When assigning such devices to
>>>>>> guest, device driver writes MSI-X irrelevant registers on those pages would
>>>>>> lead to an EPT violation and the guest is destroyed because no handler is
>>>>>> registered for those address range. In order to make guest capable to use such
>>>>>> kind of devices, trapping very frequent write accesses is not a good idea for
>>>>>> it would significantly impact the performance.
>>>>>>
>>>>>> This patch provides a workaround with caveat. Specifically, an option is
>>>>>> introduced to specify a list of devices. For those devices, Xen doesn't
>>>>>> control the access right to pages where PBA resides. Hence, guest device
>>>>>> driver is able to write those pages and functions well. Note that adding an
>>>>>> untrusted device to this option may endanger security of the entire system.
>>>>>>
>>>>>> Signed-off-by: Chao Gao <chao.gao@intel.com>
>>>>>> ---
>>>>>>  docs/misc/xen-command-line.markdown | 10 +++++++++
>>>>>>  xen/arch/x86/msi.c                  |  7 ++++--
>>>>>>  xen/drivers/passthrough/pci.c       | 45 +++++++++++++++++++++++++++++++++++--
>>>>>>  xen/include/asm-x86/msi.h           |  1 +
>>>>>>  4 files changed, 59 insertions(+), 4 deletions(-)
>>>>>>
>>>>>> diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown
>>>>>> index b353352..e382513 100644
>>>>>> --- a/docs/misc/xen-command-line.markdown
>>>>>> +++ b/docs/misc/xen-command-line.markdown
>>>>>> @@ -1423,6 +1423,16 @@ Defaults to booting secondary processors.
>>>>>>  
>>>>>>  > Default: `on`
>>>>>>  
>>>>>> +### pba\_quirk
>>>>>
>>>>> pba_write_allowed would be better, pba_quirk is too generic IMO.
>>>>
>>>> 'quirk' was I think requested by Jan; and my understanding is that the
>>>> word clearly indicates that the behavior in question is a workaround for
>>>> hardware which is not compliant with the appropriate specification.  If
>>>> you grep the source tree for 'quirk' you'll find a fairly large number.
>>>>
>>>> pba_shared_quirk might be slightly more descriptive.
>>>
>>> pba_write_quirk?
>>>
>>> I just think it should be slightly more descriptive than pba_quirk in
>>> case Xen has to add further PBA-related quirks in the future.
>>
>> "shared" tells you something about the quirk itself: The PBA is shared
>> across multiple devices.  "write" tells you about the work-around:
>> unsafe writes to the PBA region are allowed.
> 
> I don't think the PBA page is shared with multiple devices in any
> case. The problem here is that the PBA page contains other registers
> (from the same device as the PBA) that must be RW instead of RO.

Yes, I realized that after I'd clicked 'send'.  "Shared" still makes
sense though: the pba shares a page with registers which must be kept RO.

 -George

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] pci: a workaround for nonstandard PCI devices whose PBA shares
  2018-04-05  9:34 ` Roger Pau Monné
  2018-04-05  9:40   ` George Dunlap
@ 2018-04-05 11:00   ` Chao Gao
  2018-04-05 11:25     ` Roger Pau Monné
  1 sibling, 1 reply; 13+ messages in thread
From: Chao Gao @ 2018-04-05 11:00 UTC (permalink / raw)
  To: Roger Pau Monné
  Cc: Stefano Stabellini, Wei Liu, George Dunlap, Tim Deegan,
	Ian Jackson, xen-devel, Julien Grall, Jan Beulich, Andrew Cooper

On Thu, Apr 05, 2018 at 10:34:39AM +0100, Roger Pau Monné wrote:
>On Wed, Apr 04, 2018 at 11:29:39PM +0800, Chao Gao wrote:
>> ... the same page with other registers which are not relevant to MSI-X. Xen
>> marks pages where PBA resides as read-only. When assigning such devices to
>> guest, device driver writes MSI-X irrelevant registers on those pages would
>> lead to an EPT violation and the guest is destroyed because no handler is
>> registered for those address range. In order to make guest capable to use such
>> kind of devices, trapping very frequent write accesses is not a good idea for
>> it would significantly impact the performance.
>> 
>> This patch provides a workaround with caveat. Specifically, an option is
>> introduced to specify a list of devices. For those devices, Xen doesn't
>> control the access right to pages where PBA resides. Hence, guest device
>> driver is able to write those pages and functions well. Note that adding an
>> untrusted device to this option may endanger security of the entire system.
>> 
>> Signed-off-by: Chao Gao <chao.gao@intel.com>
>> ---
>>  docs/misc/xen-command-line.markdown | 10 +++++++++
>>  xen/arch/x86/msi.c                  |  7 ++++--
>>  xen/drivers/passthrough/pci.c       | 45 +++++++++++++++++++++++++++++++++++--
>>  xen/include/asm-x86/msi.h           |  1 +
>>  4 files changed, 59 insertions(+), 4 deletions(-)
>> 
>> diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown
>> index b353352..e382513 100644
>> --- a/docs/misc/xen-command-line.markdown
>> +++ b/docs/misc/xen-command-line.markdown
>> @@ -1423,6 +1423,16 @@ Defaults to booting secondary processors.
>>  
>>  > Default: `on`
>>  
>> +### pba\_quirk
>
>pba_write_allowed would be better, pba_quirk is too generic IMO.
>
>> +> `= List of [<seg>:]<bus>:<device>.<function>
>> +
>> +Specify a list of SBDF of devices. When assigning devices in this list to
>> +guest, reading or writing the page where MSI-X PBA resides are allowed.
>> +This option provides a workaround for nonstandard PCI devices whose
>> +MSI-X PBA shares the same 4K-byte page with other registers. Note that
>> +adding an untrusted device to this option would undermine security of
>> +the entire system.
>> +
>>  ### pci
>>  > `= {no-}serr | {no-}perr`
>>  
>> diff --git a/xen/arch/x86/msi.c b/xen/arch/x86/msi.c
>> index 5567990..2abf2cf 100644
>> --- a/xen/arch/x86/msi.c
>> +++ b/xen/arch/x86/msi.c
>> @@ -992,7 +992,9 @@ static int msix_capability_init(struct pci_dev *dev,
>>          if ( rangeset_add_range(mmio_ro_ranges, msix->table.first,
>>                                  msix->table.last) )
>>              WARN();
>> -        if ( rangeset_add_range(mmio_ro_ranges, msix->pba.first,
>> +
>> +        if ( !msix->pba_quirk_enabled &&
>> +             rangeset_add_range(mmio_ro_ranges, msix->pba.first,
>>                                  msix->pba.last) )
>>              WARN();
>
>This will work fine as long as the PBA is not in the same page as the
>MSI-X table. In such case you will also need changes to QEMU (see
>pci_msix_write), so that writes to the memory in the same page as the
>MSI-X/PBA tables are forwarded to the underlying hardware.
>
>You should add least add something like:
>
>if ( msix->pba_quirk_enabled &&
>     msix->table.first <= msix->pba.last &&
>     msix->pba.first <= msix->table.last )
>{
>    printk("PBA write not allowed to dev %04x:%02x:%02x.%u due to MSI-X table overlap\n");
>    return -ENXIO;
>}
>
>Or similar if the QEMU side is not fixed.
>
>Note that in order to fix the QEMU side you would probably have to add
>a flag to xl 'pci' config option and pass it to both QEMU and Xen.

Thanks for your comments.

First of all, I don't intend to also support devices which has MSI-X
table, MSI-X PBA and other MSI-X irrelevant registers in the same page.
Because as you said, it cleary violates MSI-X spec. IMO, we can extend
the workaround when we found such a device.

I also had the same concern with yours. But after careful thinking, I
found it wouldn't be a problem. If MSI-X table resides the same pages
with MSI-X PBA, we will mark the pages as RO when handling MSI-X table.
As a consequence, guest isn't able to write MSI-X table directly in this
case. Hence, it won't affect MSI-X table emulation and furthermore the
quirk won't override the decision, marking the page RO, made for other
reasons.
>
>>  
>> @@ -1139,7 +1141,8 @@ static void _pci_cleanup_msix(struct arch_msix *msix)
>>          if ( rangeset_remove_range(mmio_ro_ranges, msix->table.first,
>>                                     msix->table.last) )
>>              WARN();
>> -        if ( rangeset_remove_range(mmio_ro_ranges, msix->pba.first,
>> +        if ( !msix->pba_quirk_enabled &&
>> +             rangeset_remove_range(mmio_ro_ranges, msix->pba.first,
>>                                     msix->pba.last) )
>>              WARN();
>>      }
>> diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
>> index 1db69d5..cd765ef 100644
>> --- a/xen/drivers/passthrough/pci.c
>> +++ b/xen/drivers/passthrough/pci.c
>> @@ -184,6 +184,38 @@ static int __init parse_phantom_dev(const char *str)
>>  }
>>  custom_param("pci-phantom", parse_phantom_dev);
>>  
>> +static struct pba_quirk_dev {
>> +    uint32_t sbdf;
>> +} pba_quirk_devs[8];
>
>We have a sbdf type now, see 514f58.
>
>Also, I would prefer that you use a list here. I know it's not likely
>to have a huge number of devices in the system that require this
>quirk, but I also see no reason to limit this to 8 (or any other
>arbitrary value).

Ok. I will use a list and the new sbdf type.

>
>> +static unsigned int nr_pba_quirk_devs;
>> +
>> +static int __init parse_pba_quirk(const char *str)
>> +{
>> +    unsigned int seg, bus, dev, func;
>> +
>> +    for ( ; ; )
>> +    {
>> +        if ( nr_pba_quirk_devs >= ARRAY_SIZE(pba_quirk_devs) )
>> +            return -E2BIG;
>> +
>> +        str = parse_pci(str, &seg, &bus, &dev, &func);
>> +        if ( !str )
>> +            return -EINVAL;
>> +
>> +        pba_quirk_devs[nr_pba_quirk_devs++].sbdf = PCI_SBDF(seg, bus, dev,
>> +                                                            func);
>> +        if ( *str == ',' )
>> +            str++;
>> +        else if ( !*str )
>> +            break;
>> +        else
>> +            return -EINVAL;
>> +    }
>> +
>> +    return 0;
>> +}
>> +custom_param("pba_quirk", parse_pba_quirk);
>> +
>>  static u16 __read_mostly command_mask;
>>  static u16 __read_mostly bridge_ctl_mask;
>>  
>> @@ -300,6 +332,7 @@ static void check_pdev(const struct pci_dev *pdev)
>>  
>>  static struct pci_dev *alloc_pdev(struct pci_seg *pseg, u8 bus, u8 devfn)
>>  {
>> +    unsigned int i;
>>      struct pci_dev *pdev;
>>  
>>      list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list )
>> @@ -328,6 +361,16 @@ static struct pci_dev *alloc_pdev(struct pci_seg *pseg, u8 bus, u8 devfn)
>>          }
>>          spin_lock_init(&msix->table_lock);
>>          pdev->msix = msix;
>> +
>> +        for ( i = 0; i < nr_pba_quirk_devs; i++ )
>> +        {
>> +            if ( pba_quirk_devs[i].sbdf == PCI_SBDF3(pseg->nr, bus, devfn) )
>> +            {
>> +                pdev->msix->pba_quirk_enabled = true;
>> +                printk(XENLOG_WARNING "Enable PBA quirk for %04x:%02x:%02x.%u\n",
>> +                       pseg->nr, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
>> +            }
>
>You could do this as:
>
>if ( pba_quirk_devs[i].sbdf != PCI_SBDF3(pseg->nr, bus, devfn) )
>    continue;
>
>pdev->msix->pba_quirk_enabled = true;
>printk(XENLOG_WARNING "Enable PBA quirk for %04x:%02x:%02x.%u\n",
>       pseg->nr, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
>break;
>
>Which removes one level of indentation (and also exits the loop as
>soon as a match is found).

Very good suggestion.

Thanks
Chao

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] pci: a workaround for nonstandard PCI devices whose PBA shares
  2018-04-05 11:00   ` Chao Gao
@ 2018-04-05 11:25     ` Roger Pau Monné
  2018-04-05 11:43       ` Chao Gao
  0 siblings, 1 reply; 13+ messages in thread
From: Roger Pau Monné @ 2018-04-05 11:25 UTC (permalink / raw)
  To: Chao Gao
  Cc: Stefano Stabellini, Wei Liu, George Dunlap, Tim Deegan,
	Ian Jackson, xen-devel, Julien Grall, Jan Beulich, Andrew Cooper

On Thu, Apr 05, 2018 at 07:00:41PM +0800, Chao Gao wrote:
> On Thu, Apr 05, 2018 at 10:34:39AM +0100, Roger Pau Monné wrote:
> >On Wed, Apr 04, 2018 at 11:29:39PM +0800, Chao Gao wrote:
> >> diff --git a/xen/arch/x86/msi.c b/xen/arch/x86/msi.c
> >> index 5567990..2abf2cf 100644
> >> --- a/xen/arch/x86/msi.c
> >> +++ b/xen/arch/x86/msi.c
> >> @@ -992,7 +992,9 @@ static int msix_capability_init(struct pci_dev *dev,
> >>          if ( rangeset_add_range(mmio_ro_ranges, msix->table.first,
> >>                                  msix->table.last) )
> >>              WARN();
> >> -        if ( rangeset_add_range(mmio_ro_ranges, msix->pba.first,
> >> +
> >> +        if ( !msix->pba_quirk_enabled &&
> >> +             rangeset_add_range(mmio_ro_ranges, msix->pba.first,
> >>                                  msix->pba.last) )
> >>              WARN();
> >
> >This will work fine as long as the PBA is not in the same page as the
> >MSI-X table. In such case you will also need changes to QEMU (see
> >pci_msix_write), so that writes to the memory in the same page as the
> >MSI-X/PBA tables are forwarded to the underlying hardware.
> >
> >You should add least add something like:
> >
> >if ( msix->pba_quirk_enabled &&
> >     msix->table.first <= msix->pba.last &&
> >     msix->pba.first <= msix->table.last )
> >{
> >    printk("PBA write not allowed to dev %04x:%02x:%02x.%u due to MSI-X table overlap\n");
> >    return -ENXIO;
> >}
> >
> >Or similar if the QEMU side is not fixed.
> >
> >Note that in order to fix the QEMU side you would probably have to add
> >a flag to xl 'pci' config option and pass it to both QEMU and Xen.
> 
> Thanks for your comments.
> 
> First of all, I don't intend to also support devices which has MSI-X
> table, MSI-X PBA and other MSI-X irrelevant registers in the same page.
> Because as you said, it cleary violates MSI-X spec. IMO, we can extend
> the workaround when we found such a device.
>
> I also had the same concern with yours. But after careful thinking, I
> found it wouldn't be a problem. If MSI-X table resides the same pages
> with MSI-X PBA, we will mark the pages as RO when handling MSI-X table.
> As a consequence, guest isn't able to write MSI-X table directly in this
> case. Hence, it won't affect MSI-X table emulation and furthermore the
> quirk won't override the decision, marking the page RO, made for other
> reasons.

My suggestion is not because it would be dangerous from a security
PoV, it's simply because the quirk won't be applied, and hence we need
to notify the user that the desired quirk has not been applied.

Roger.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] pci: a workaround for nonstandard PCI devices whose PBA shares
  2018-04-05 11:25     ` Roger Pau Monné
@ 2018-04-05 11:43       ` Chao Gao
  0 siblings, 0 replies; 13+ messages in thread
From: Chao Gao @ 2018-04-05 11:43 UTC (permalink / raw)
  To: Roger Pau Monné
  Cc: Stefano Stabellini, Wei Liu, George Dunlap, Tim Deegan,
	Ian Jackson, xen-devel, Julien Grall, Jan Beulich, Andrew Cooper

On Thu, Apr 05, 2018 at 12:25:26PM +0100, Roger Pau Monné wrote:
>On Thu, Apr 05, 2018 at 07:00:41PM +0800, Chao Gao wrote:
>> On Thu, Apr 05, 2018 at 10:34:39AM +0100, Roger Pau Monné wrote:
>> >On Wed, Apr 04, 2018 at 11:29:39PM +0800, Chao Gao wrote:
>> >> diff --git a/xen/arch/x86/msi.c b/xen/arch/x86/msi.c
>> >> index 5567990..2abf2cf 100644
>> >> --- a/xen/arch/x86/msi.c
>> >> +++ b/xen/arch/x86/msi.c
>> >> @@ -992,7 +992,9 @@ static int msix_capability_init(struct pci_dev *dev,
>> >>          if ( rangeset_add_range(mmio_ro_ranges, msix->table.first,
>> >>                                  msix->table.last) )
>> >>              WARN();
>> >> -        if ( rangeset_add_range(mmio_ro_ranges, msix->pba.first,
>> >> +
>> >> +        if ( !msix->pba_quirk_enabled &&
>> >> +             rangeset_add_range(mmio_ro_ranges, msix->pba.first,
>> >>                                  msix->pba.last) )
>> >>              WARN();
>> >
>> >This will work fine as long as the PBA is not in the same page as the
>> >MSI-X table. In such case you will also need changes to QEMU (see
>> >pci_msix_write), so that writes to the memory in the same page as the
>> >MSI-X/PBA tables are forwarded to the underlying hardware.
>> >
>> >You should add least add something like:
>> >
>> >if ( msix->pba_quirk_enabled &&
>> >     msix->table.first <= msix->pba.last &&
>> >     msix->pba.first <= msix->table.last )
>> >{
>> >    printk("PBA write not allowed to dev %04x:%02x:%02x.%u due to MSI-X table overlap\n");
>> >    return -ENXIO;
>> >}
>> >
>> >Or similar if the QEMU side is not fixed.
>> >
>> >Note that in order to fix the QEMU side you would probably have to add
>> >a flag to xl 'pci' config option and pass it to both QEMU and Xen.
>> 
>> Thanks for your comments.
>> 
>> First of all, I don't intend to also support devices which has MSI-X
>> table, MSI-X PBA and other MSI-X irrelevant registers in the same page.
>> Because as you said, it cleary violates MSI-X spec. IMO, we can extend
>> the workaround when we found such a device.
>>
>> I also had the same concern with yours. But after careful thinking, I
>> found it wouldn't be a problem. If MSI-X table resides the same pages
>> with MSI-X PBA, we will mark the pages as RO when handling MSI-X table.
>> As a consequence, guest isn't able to write MSI-X table directly in this
>> case. Hence, it won't affect MSI-X table emulation and furthermore the
>> quirk won't override the decision, marking the page RO, made for other
>> reasons.
>
>My suggestion is not because it would be dangerous from a security
>PoV, it's simply because the quirk won't be applied, and hence we need
>to notify the user that the desired quirk has not been applied.

Got it. It is reasonable.

Thanks
Chao


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] pci: a workaround for nonstandard PCI devices whose PBA shares
  2018-04-05 10:08           ` George Dunlap
@ 2018-04-05 11:48             ` Chao Gao
  0 siblings, 0 replies; 13+ messages in thread
From: Chao Gao @ 2018-04-05 11:48 UTC (permalink / raw)
  To: George Dunlap
  Cc: Stefano Stabellini, Wei Liu, George Dunlap, Tim Deegan,
	Ian Jackson, xen-devel, Julien Grall, Jan Beulich, Andrew Cooper,
	Roger Pau Monné

On Thu, Apr 05, 2018 at 11:08:59AM +0100, George Dunlap wrote:
>On 04/05/2018 10:59 AM, Roger Pau Monné wrote:
>> On Thu, Apr 05, 2018 at 10:52:09AM +0100, George Dunlap wrote:
>>> On 04/05/2018 10:46 AM, Roger Pau Monné wrote:
>>>> On Thu, Apr 05, 2018 at 10:40:37AM +0100, George Dunlap wrote:
>>>>> On 04/05/2018 10:34 AM, Roger Pau Monné wrote:
>>>>>> On Wed, Apr 04, 2018 at 11:29:39PM +0800, Chao Gao wrote:
>>>>>>> ... the same page with other registers which are not relevant to MSI-X. Xen
>>>>>>> marks pages where PBA resides as read-only. When assigning such devices to
>>>>>>> guest, device driver writes MSI-X irrelevant registers on those pages would
>>>>>>> lead to an EPT violation and the guest is destroyed because no handler is
>>>>>>> registered for those address range. In order to make guest capable to use such
>>>>>>> kind of devices, trapping very frequent write accesses is not a good idea for
>>>>>>> it would significantly impact the performance.
>>>>>>>
>>>>>>> This patch provides a workaround with caveat. Specifically, an option is
>>>>>>> introduced to specify a list of devices. For those devices, Xen doesn't
>>>>>>> control the access right to pages where PBA resides. Hence, guest device
>>>>>>> driver is able to write those pages and functions well. Note that adding an
>>>>>>> untrusted device to this option may endanger security of the entire system.
>>>>>>>
>>>>>>> Signed-off-by: Chao Gao <chao.gao@intel.com>
>>>>>>> ---
>>>>>>>  docs/misc/xen-command-line.markdown | 10 +++++++++
>>>>>>>  xen/arch/x86/msi.c                  |  7 ++++--
>>>>>>>  xen/drivers/passthrough/pci.c       | 45 +++++++++++++++++++++++++++++++++++--
>>>>>>>  xen/include/asm-x86/msi.h           |  1 +
>>>>>>>  4 files changed, 59 insertions(+), 4 deletions(-)
>>>>>>>
>>>>>>> diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown
>>>>>>> index b353352..e382513 100644
>>>>>>> --- a/docs/misc/xen-command-line.markdown
>>>>>>> +++ b/docs/misc/xen-command-line.markdown
>>>>>>> @@ -1423,6 +1423,16 @@ Defaults to booting secondary processors.
>>>>>>>  
>>>>>>>  > Default: `on`
>>>>>>>  
>>>>>>> +### pba\_quirk
>>>>>>
>>>>>> pba_write_allowed would be better, pba_quirk is too generic IMO.
>>>>>
>>>>> 'quirk' was I think requested by Jan; and my understanding is that the
>>>>> word clearly indicates that the behavior in question is a workaround for
>>>>> hardware which is not compliant with the appropriate specification.  If
>>>>> you grep the source tree for 'quirk' you'll find a fairly large number.
>>>>>
>>>>> pba_shared_quirk might be slightly more descriptive.
>>>>
>>>> pba_write_quirk?
>>>>
>>>> I just think it should be slightly more descriptive than pba_quirk in
>>>> case Xen has to add further PBA-related quirks in the future.
>>>
>>> "shared" tells you something about the quirk itself: The PBA is shared
>>> across multiple devices.  "write" tells you about the work-around:
>>> unsafe writes to the PBA region are allowed.
>> 
>> I don't think the PBA page is shared with multiple devices in any
>> case. The problem here is that the PBA page contains other registers
>> (from the same device as the PBA) that must be RW instead of RO.
>
>Yes, I realized that after I'd clicked 'send'.  "Shared" still makes
>sense though: the pba shares a page with registers which must be kept RO.

pba_shared_quirk is fine with me. I will use it.

Thanks
Chao

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2018-04-05 11:48 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-04-04 15:29 [PATCH] pci: a workaround for nonstandard PCI devices whose PBA shares Chao Gao
2018-04-04 15:45 ` Roger Pau Monné
2018-04-04 16:20   ` Chao Gao
2018-04-05  9:34 ` Roger Pau Monné
2018-04-05  9:40   ` George Dunlap
2018-04-05  9:46     ` Roger Pau Monné
2018-04-05  9:52       ` George Dunlap
2018-04-05  9:59         ` Roger Pau Monné
2018-04-05 10:08           ` George Dunlap
2018-04-05 11:48             ` Chao Gao
2018-04-05 11:00   ` Chao Gao
2018-04-05 11:25     ` Roger Pau Monné
2018-04-05 11:43       ` Chao Gao

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.