All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 0/4] (mainly) IOMMU hook adjustments
@ 2022-01-27 14:46 Jan Beulich
  2022-01-27 14:47 ` [PATCH v2 1/4] IOMMU/x86: switch to alternatives-call patching in further instances Jan Beulich
                   ` (3 more replies)
  0 siblings, 4 replies; 15+ messages in thread
From: Jan Beulich @ 2022-01-27 14:46 UTC (permalink / raw)
  To: xen-devel; +Cc: Paul Durrant, Andrew Cooper

Besides the altcall conversion, some further adjustments appeared desirable
to do while touching that area.

1: IOMMU/x86: switch to alternatives-call patching in further instances
2: VT-d / x86: re-arrange cache syncing
3: VT-d: replace flush_all_cache()
4: IOMMU/PCI: propagate get_device_group_id() failure

I'm sending this v2 after waiting for quite some time on continuing some
aspects which were discussed on the v1 thread, but no luck. Also there's
a new 4th patch now, as kind of a result from on part of that discussion.

Jan



^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v2 1/4] IOMMU/x86: switch to alternatives-call patching in further instances
  2022-01-27 14:46 [PATCH v2 0/4] (mainly) IOMMU hook adjustments Jan Beulich
@ 2022-01-27 14:47 ` Jan Beulich
  2022-01-28  9:28   ` Durrant, Paul
  2022-01-28 10:40   ` Rahul Singh
  2022-01-27 14:48 ` [PATCH v2 2/4] VT-d / x86: re-arrange cache syncing Jan Beulich
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 15+ messages in thread
From: Jan Beulich @ 2022-01-27 14:47 UTC (permalink / raw)
  To: xen-devel; +Cc: Paul Durrant, Andrew Cooper

This is, once again, to limit the number of indirect calls as much as
possible. The only hook invocation which isn't sensible to convert is
setup(). And of course Arm-only use sites are left alone as well.

Note regarding the introduction / use of local variables in pci.c:
struct pci_dev's involved fields are const. This const propagates, via
typeof(), to the local helper variables in the altcall macros. These
helper variables are, however, used as outputs (and hence can't be
const). In iommu_get_device_group() make use of the new local variables
to also simplify some adjacent code.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -198,7 +198,7 @@ int iommu_domain_init(struct domain *d,
         return ret;
 
     hd->platform_ops = iommu_get_ops();
-    ret = hd->platform_ops->init(d);
+    ret = iommu_call(hd->platform_ops, init, d);
     if ( ret || is_system_domain(d) )
         return ret;
 
@@ -233,7 +233,7 @@ void __hwdom_init iommu_hwdom_init(struc
 
     register_keyhandler('o', &iommu_dump_page_tables, "dump iommu page tables", 0);
 
-    hd->platform_ops->hwdom_init(d);
+    iommu_vcall(hd->platform_ops, hwdom_init, d);
 }
 
 static void iommu_teardown(struct domain *d)
@@ -576,7 +576,7 @@ int iommu_get_reserved_device_memory(iom
     if ( !ops->get_reserved_device_memory )
         return 0;
 
-    return ops->get_reserved_device_memory(func, ctxt);
+    return iommu_call(ops, get_reserved_device_memory, func, ctxt);
 }
 
 bool_t iommu_has_feature(struct domain *d, enum iommu_feature feature)
@@ -603,7 +603,7 @@ static void iommu_dump_page_tables(unsig
             continue;
         }
 
-        dom_iommu(d)->platform_ops->dump_page_tables(d);
+        iommu_vcall(dom_iommu(d)->platform_ops, dump_page_tables, d);
     }
 
     rcu_read_unlock(&domlist_read_lock);
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -861,15 +861,15 @@ static int deassign_device(struct domain
         devfn += pdev->phantom_stride;
         if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
             break;
-        ret = hd->platform_ops->reassign_device(d, target, devfn,
-                                                pci_to_dev(pdev));
+        ret = iommu_call(hd->platform_ops, reassign_device, d, target, devfn,
+                         pci_to_dev(pdev));
         if ( ret )
             goto out;
     }
 
     devfn = pdev->devfn;
-    ret = hd->platform_ops->reassign_device(d, target, devfn,
-                                            pci_to_dev(pdev));
+    ret = iommu_call(hd->platform_ops, reassign_device, d, target, devfn,
+                     pci_to_dev(pdev));
     if ( ret )
         goto out;
 
@@ -1300,7 +1300,7 @@ static int iommu_add_device(struct pci_d
 {
     const struct domain_iommu *hd;
     int rc;
-    u8 devfn;
+    unsigned int devfn = pdev->devfn;
 
     if ( !pdev->domain )
         return -EINVAL;
@@ -1311,16 +1311,16 @@ static int iommu_add_device(struct pci_d
     if ( !is_iommu_enabled(pdev->domain) )
         return 0;
 
-    rc = hd->platform_ops->add_device(pdev->devfn, pci_to_dev(pdev));
+    rc = iommu_call(hd->platform_ops, add_device, devfn, pci_to_dev(pdev));
     if ( rc || !pdev->phantom_stride )
         return rc;
 
-    for ( devfn = pdev->devfn ; ; )
+    for ( ; ; )
     {
         devfn += pdev->phantom_stride;
         if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
             return 0;
-        rc = hd->platform_ops->add_device(devfn, pci_to_dev(pdev));
+        rc = iommu_call(hd->platform_ops, add_device, devfn, pci_to_dev(pdev));
         if ( rc )
             printk(XENLOG_WARNING "IOMMU: add %pp failed (%d)\n",
                    &pdev->sbdf, rc);
@@ -1341,7 +1341,7 @@ static int iommu_enable_device(struct pc
          !hd->platform_ops->enable_device )
         return 0;
 
-    return hd->platform_ops->enable_device(pci_to_dev(pdev));
+    return iommu_call(hd->platform_ops, enable_device, pci_to_dev(pdev));
 }
 
 static int iommu_remove_device(struct pci_dev *pdev)
@@ -1363,7 +1363,8 @@ static int iommu_remove_device(struct pc
         devfn += pdev->phantom_stride;
         if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
             break;
-        rc = hd->platform_ops->remove_device(devfn, pci_to_dev(pdev));
+        rc = iommu_call(hd->platform_ops, remove_device, devfn,
+                        pci_to_dev(pdev));
         if ( !rc )
             continue;
 
@@ -1371,7 +1372,9 @@ static int iommu_remove_device(struct pc
         return rc;
     }
 
-    return hd->platform_ops->remove_device(pdev->devfn, pci_to_dev(pdev));
+    devfn = pdev->devfn;
+
+    return iommu_call(hd->platform_ops, remove_device, devfn, pci_to_dev(pdev));
 }
 
 static int device_assigned(u16 seg, u8 bus, u8 devfn)
@@ -1421,7 +1424,8 @@ static int assign_device(struct domain *
 
     pdev->fault.count = 0;
 
-    if ( (rc = hd->platform_ops->assign_device(d, devfn, pci_to_dev(pdev), flag)) )
+    if ( (rc = iommu_call(hd->platform_ops, assign_device, d, devfn,
+                          pci_to_dev(pdev), flag)) )
         goto done;
 
     for ( ; pdev->phantom_stride; rc = 0 )
@@ -1429,7 +1433,8 @@ static int assign_device(struct domain *
         devfn += pdev->phantom_stride;
         if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
             break;
-        rc = hd->platform_ops->assign_device(d, devfn, pci_to_dev(pdev), flag);
+        rc = iommu_call(hd->platform_ops, assign_device, d, devfn,
+                        pci_to_dev(pdev), flag);
     }
 
  done:
@@ -1457,24 +1462,24 @@ static int iommu_get_device_group(
     if ( !is_iommu_enabled(d) || !ops->get_device_group_id )
         return 0;
 
-    group_id = ops->get_device_group_id(seg, bus, devfn);
+    group_id = iommu_call(ops, get_device_group_id, seg, bus, devfn);
 
     pcidevs_lock();
     for_each_pdev( d, pdev )
     {
-        if ( (pdev->seg != seg) ||
-             ((pdev->bus == bus) && (pdev->devfn == devfn)) )
+        unsigned int b = pdev->bus;
+        unsigned int df = pdev->devfn;
+
+        if ( (pdev->seg != seg) || ((b == bus) && (df == devfn)) )
             continue;
 
-        if ( xsm_get_device_group(XSM_HOOK, (seg << 16) | (pdev->bus << 8) | pdev->devfn) )
+        if ( xsm_get_device_group(XSM_HOOK, (seg << 16) | (b << 8) | df) )
             continue;
 
-        sdev_id = ops->get_device_group_id(seg, pdev->bus, pdev->devfn);
+        sdev_id = iommu_call(ops, get_device_group_id, seg, b, df);
         if ( (sdev_id == group_id) && (i < max_sdevs) )
         {
-            bdf = 0;
-            bdf |= (pdev->bus & 0xff) << 16;
-            bdf |= (pdev->devfn & 0xff) << 8;
+            bdf = (b << 16) | (df << 8);
 
             if ( unlikely(copy_to_guest_offset(buf, i, &bdf, 1)) )
             {
--- a/xen/drivers/passthrough/x86/iommu.c
+++ b/xen/drivers/passthrough/x86/iommu.c
@@ -145,7 +145,7 @@ unsigned int iommu_read_apic_from_ire(un
 int __init iommu_setup_hpet_msi(struct msi_desc *msi)
 {
     const struct iommu_ops *ops = iommu_get_ops();
-    return ops->setup_hpet_msi ? ops->setup_hpet_msi(msi) : -ENODEV;
+    return ops->setup_hpet_msi ? iommu_call(ops, setup_hpet_msi, msi) : -ENODEV;
 }
 
 void __hwdom_init arch_iommu_check_autotranslated_hwdom(struct domain *d)
@@ -406,7 +406,7 @@ int iommu_free_pgtables(struct domain *d
      * Pages will be moved to the free list below. So we want to
      * clear the root page-table to avoid any potential use after-free.
      */
-    hd->platform_ops->clear_root_pgtable(d);
+    iommu_vcall(hd->platform_ops, clear_root_pgtable, d);
 
     while ( (pg = page_list_remove_head(&hd->arch.pgtables.list)) )
     {



^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v2 2/4] VT-d / x86: re-arrange cache syncing
  2022-01-27 14:46 [PATCH v2 0/4] (mainly) IOMMU hook adjustments Jan Beulich
  2022-01-27 14:47 ` [PATCH v2 1/4] IOMMU/x86: switch to alternatives-call patching in further instances Jan Beulich
@ 2022-01-27 14:48 ` Jan Beulich
  2022-01-28  9:32   ` Durrant, Paul
  2022-02-18  5:34   ` Tian, Kevin
  2022-01-27 14:49 ` [PATCH v2 3/4] VT-d: replace flush_all_cache() Jan Beulich
  2022-01-27 14:49 ` [PATCH v2 4/4] IOMMU/PCI: propagate get_device_group_id() failure Jan Beulich
  3 siblings, 2 replies; 15+ messages in thread
From: Jan Beulich @ 2022-01-27 14:48 UTC (permalink / raw)
  To: xen-devel
  Cc: Paul Durrant, Kevin Tian, Andrew Cooper, Wei Liu, Roger Pau Monné

The actual function should always have lived in core x86 code; move it
there, replacing get_cache_line_size() by readily available (except very
early during boot; see the code comment) data. Also rename the function.

Drop the respective IOMMU hook, (re)introducing a respective boolean
instead. Replace a true and an almost open-coding instance of
iommu_sync_cache().

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
Placing the function next to flush_area_local() exposes a curious
asymmetry between the SFENCE placements: sync_cache() has it after the
flush, while flush_area_local() has it before it. I think the latter one
is misplaced.
---
v2: Rename sync_cache() to cache_writeback().

--- a/xen/arch/x86/flushtlb.c
+++ b/xen/arch/x86/flushtlb.c
@@ -11,6 +11,7 @@
 #include <xen/sched.h>
 #include <xen/smp.h>
 #include <xen/softirq.h>
+#include <asm/cache.h>
 #include <asm/flushtlb.h>
 #include <asm/invpcid.h>
 #include <asm/nops.h>
@@ -265,6 +266,57 @@ unsigned int flush_area_local(const void
     return flags;
 }
 
+void cache_writeback(const void *addr, unsigned int size)
+{
+    /*
+     * This function may be called before current_cpu_data is established.
+     * Hence a fallback is needed to prevent the loop below becoming infinite.
+     */
+    unsigned int clflush_size = current_cpu_data.x86_clflush_size ?: 16;
+    const void *end = addr + size;
+
+    addr -= (unsigned long)addr & (clflush_size - 1);
+    for ( ; addr < end; addr += clflush_size )
+    {
+/*
+ * The arguments to a macro must not include preprocessor directives. Doing so
+ * results in undefined behavior, so we have to create some defines here in
+ * order to avoid it.
+ */
+#if defined(HAVE_AS_CLWB)
+# define CLWB_ENCODING "clwb %[p]"
+#elif defined(HAVE_AS_XSAVEOPT)
+# define CLWB_ENCODING "data16 xsaveopt %[p]" /* clwb */
+#else
+# define CLWB_ENCODING ".byte 0x66, 0x0f, 0xae, 0x30" /* clwb (%%rax) */
+#endif
+
+#define BASE_INPUT(addr) [p] "m" (*(const char *)(addr))
+#if defined(HAVE_AS_CLWB) || defined(HAVE_AS_XSAVEOPT)
+# define INPUT BASE_INPUT
+#else
+# define INPUT(addr) "a" (addr), BASE_INPUT(addr)
+#endif
+        /*
+         * Note regarding the use of NOP_DS_PREFIX: it's faster to do a clflush
+         * + prefix than a clflush + nop, and hence the prefix is added instead
+         * of letting the alternative framework fill the gap by appending nops.
+         */
+        alternative_io_2(".byte " __stringify(NOP_DS_PREFIX) "; clflush %[p]",
+                         "data16 clflush %[p]", /* clflushopt */
+                         X86_FEATURE_CLFLUSHOPT,
+                         CLWB_ENCODING,
+                         X86_FEATURE_CLWB, /* no outputs */,
+                         INPUT(addr));
+#undef INPUT
+#undef BASE_INPUT
+#undef CLWB_ENCODING
+    }
+
+    alternative_2("", "sfence", X86_FEATURE_CLFLUSHOPT,
+                      "sfence", X86_FEATURE_CLWB);
+}
+
 unsigned int guest_flush_tlb_flags(const struct domain *d)
 {
     bool shadow = paging_mode_shadow(d);
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -240,54 +240,6 @@ domid_t did_to_domain_id(const struct vt
     return iommu->domid_map[did];
 }
 
-static void sync_cache(const void *addr, unsigned int size)
-{
-    static unsigned long clflush_size = 0;
-    const void *end = addr + size;
-
-    if ( clflush_size == 0 )
-        clflush_size = get_cache_line_size();
-
-    addr -= (unsigned long)addr & (clflush_size - 1);
-    for ( ; addr < end; addr += clflush_size )
-/*
- * The arguments to a macro must not include preprocessor directives. Doing so
- * results in undefined behavior, so we have to create some defines here in
- * order to avoid it.
- */
-#if defined(HAVE_AS_CLWB)
-# define CLWB_ENCODING "clwb %[p]"
-#elif defined(HAVE_AS_XSAVEOPT)
-# define CLWB_ENCODING "data16 xsaveopt %[p]" /* clwb */
-#else
-# define CLWB_ENCODING ".byte 0x66, 0x0f, 0xae, 0x30" /* clwb (%%rax) */
-#endif
-
-#define BASE_INPUT(addr) [p] "m" (*(const char *)(addr))
-#if defined(HAVE_AS_CLWB) || defined(HAVE_AS_XSAVEOPT)
-# define INPUT BASE_INPUT
-#else
-# define INPUT(addr) "a" (addr), BASE_INPUT(addr)
-#endif
-        /*
-         * Note regarding the use of NOP_DS_PREFIX: it's faster to do a clflush
-         * + prefix than a clflush + nop, and hence the prefix is added instead
-         * of letting the alternative framework fill the gap by appending nops.
-         */
-        alternative_io_2(".byte " __stringify(NOP_DS_PREFIX) "; clflush %[p]",
-                         "data16 clflush %[p]", /* clflushopt */
-                         X86_FEATURE_CLFLUSHOPT,
-                         CLWB_ENCODING,
-                         X86_FEATURE_CLWB, /* no outputs */,
-                         INPUT(addr));
-#undef INPUT
-#undef BASE_INPUT
-#undef CLWB_ENCODING
-
-    alternative_2("", "sfence", X86_FEATURE_CLFLUSHOPT,
-                      "sfence", X86_FEATURE_CLWB);
-}
-
 /* Allocate page table, return its machine address */
 uint64_t alloc_pgtable_maddr(unsigned long npages, nodeid_t node)
 {
@@ -306,8 +258,7 @@ uint64_t alloc_pgtable_maddr(unsigned lo
 
         clear_page(vaddr);
 
-        if ( (iommu_ops.init ? &iommu_ops : &vtd_ops)->sync_cache )
-            sync_cache(vaddr, PAGE_SIZE);
+        iommu_sync_cache(vaddr, PAGE_SIZE);
         unmap_domain_page(vaddr);
         cur_pg++;
     }
@@ -1327,7 +1278,7 @@ int __init iommu_alloc(struct acpi_drhd_
     iommu->nr_pt_levels = agaw_to_level(agaw);
 
     if ( !ecap_coherent(iommu->ecap) )
-        vtd_ops.sync_cache = sync_cache;
+        iommu_non_coherent = true;
 
     nr_dom = cap_ndoms(iommu->cap);
 
--- a/xen/drivers/passthrough/x86/iommu.c
+++ b/xen/drivers/passthrough/x86/iommu.c
@@ -28,6 +28,7 @@
 
 const struct iommu_init_ops *__initdata iommu_init_ops;
 struct iommu_ops __read_mostly iommu_ops;
+bool __read_mostly iommu_non_coherent;
 
 enum iommu_intremap __read_mostly iommu_intremap = iommu_intremap_full;
 
@@ -438,8 +439,7 @@ struct page_info *iommu_alloc_pgtable(st
     p = __map_domain_page(pg);
     clear_page(p);
 
-    if ( hd->platform_ops->sync_cache )
-        iommu_vcall(hd->platform_ops, sync_cache, p, PAGE_SIZE);
+    iommu_sync_cache(p, PAGE_SIZE);
 
     unmap_domain_page(p);
 
--- a/xen/arch/x86/include/asm/cache.h
+++ b/xen/arch/x86/include/asm/cache.h
@@ -11,4 +11,10 @@
 
 #define __read_mostly __section(".data.read_mostly")
 
+#ifndef __ASSEMBLY__
+
+void cache_writeback(const void *addr, unsigned int size);
+
+#endif
+
 #endif
--- a/xen/arch/x86/include/asm/iommu.h
+++ b/xen/arch/x86/include/asm/iommu.h
@@ -19,6 +19,7 @@
 #include <xen/mem_access.h>
 #include <xen/spinlock.h>
 #include <asm/apicdef.h>
+#include <asm/cache.h>
 #include <asm/processor.h>
 #include <asm/hvm/vmx/vmcs.h>
 
@@ -134,12 +135,13 @@ extern bool untrusted_msi;
 int pi_update_irte(const struct pi_desc *pi_desc, const struct pirq *pirq,
                    const uint8_t gvec);
 
-#define iommu_sync_cache(addr, size) ({                 \
-    const struct iommu_ops *ops = iommu_get_ops();      \
-                                                        \
-    if ( ops->sync_cache )                              \
-        iommu_vcall(ops, sync_cache, addr, size);       \
-})
+extern bool iommu_non_coherent;
+
+static inline void iommu_sync_cache(const void *addr, unsigned int size)
+{
+    if ( iommu_non_coherent )
+        cache_writeback(addr, size);
+}
 
 int __must_check iommu_free_pgtables(struct domain *d);
 struct page_info *__must_check iommu_alloc_pgtable(struct domain *d);
--- a/xen/include/xen/iommu.h
+++ b/xen/include/xen/iommu.h
@@ -268,7 +268,6 @@ struct iommu_ops {
     int (*setup_hpet_msi)(struct msi_desc *);
 
     int (*adjust_irq_affinities)(void);
-    void (*sync_cache)(const void *addr, unsigned int size);
     void (*clear_root_pgtable)(struct domain *d);
     int (*update_ire_from_msi)(struct msi_desc *msi_desc, struct msi_msg *msg);
 #endif /* CONFIG_X86 */
--- a/xen/drivers/passthrough/vtd/extern.h
+++ b/xen/drivers/passthrough/vtd/extern.h
@@ -78,7 +78,6 @@ int __must_check qinval_device_iotlb_syn
                                           struct pci_dev *pdev,
                                           u16 did, u16 size, u64 addr);
 
-unsigned int get_cache_line_size(void);
 void flush_all_cache(void);
 
 uint64_t alloc_pgtable_maddr(unsigned long npages, nodeid_t node);
--- a/xen/drivers/passthrough/vtd/x86/vtd.c
+++ b/xen/drivers/passthrough/vtd/x86/vtd.c
@@ -47,11 +47,6 @@ void unmap_vtd_domain_page(const void *v
     unmap_domain_page(va);
 }
 
-unsigned int get_cache_line_size(void)
-{
-    return ((cpuid_ebx(1) >> 8) & 0xff) * 8;
-}
-
 void flush_all_cache()
 {
     wbinvd();



^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v2 3/4] VT-d: replace flush_all_cache()
  2022-01-27 14:46 [PATCH v2 0/4] (mainly) IOMMU hook adjustments Jan Beulich
  2022-01-27 14:47 ` [PATCH v2 1/4] IOMMU/x86: switch to alternatives-call patching in further instances Jan Beulich
  2022-01-27 14:48 ` [PATCH v2 2/4] VT-d / x86: re-arrange cache syncing Jan Beulich
@ 2022-01-27 14:49 ` Jan Beulich
  2022-01-28  9:33   ` Durrant, Paul
  2022-02-18  5:35   ` Tian, Kevin
  2022-01-27 14:49 ` [PATCH v2 4/4] IOMMU/PCI: propagate get_device_group_id() failure Jan Beulich
  3 siblings, 2 replies; 15+ messages in thread
From: Jan Beulich @ 2022-01-27 14:49 UTC (permalink / raw)
  To: xen-devel; +Cc: Paul Durrant, Andrew Cooper, Kevin Tian

Let's use infrastructure we have available instead of an open-coded
wbinvd() invocation.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/drivers/passthrough/vtd/extern.h
+++ b/xen/drivers/passthrough/vtd/extern.h
@@ -78,8 +78,6 @@ int __must_check qinval_device_iotlb_syn
                                           struct pci_dev *pdev,
                                           u16 did, u16 size, u64 addr);
 
-void flush_all_cache(void);
-
 uint64_t alloc_pgtable_maddr(unsigned long npages, nodeid_t node);
 void free_pgtable_maddr(u64 maddr);
 void *map_vtd_domain_page(u64 maddr);
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -623,7 +623,8 @@ static int __must_check iommu_flush_all(
     bool_t flush_dev_iotlb;
     int rc = 0;
 
-    flush_all_cache();
+    flush_local(FLUSH_CACHE);
+
     for_each_drhd_unit ( drhd )
     {
         int context_rc, iotlb_rc;
--- a/xen/drivers/passthrough/vtd/x86/vtd.c
+++ b/xen/drivers/passthrough/vtd/x86/vtd.c
@@ -46,8 +46,3 @@ void unmap_vtd_domain_page(const void *v
 {
     unmap_domain_page(va);
 }
-
-void flush_all_cache()
-{
-    wbinvd();
-}



^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v2 4/4] IOMMU/PCI: propagate get_device_group_id() failure
  2022-01-27 14:46 [PATCH v2 0/4] (mainly) IOMMU hook adjustments Jan Beulich
                   ` (2 preceding siblings ...)
  2022-01-27 14:49 ` [PATCH v2 3/4] VT-d: replace flush_all_cache() Jan Beulich
@ 2022-01-27 14:49 ` Jan Beulich
  2022-01-28  9:37   ` Durrant, Paul
  2022-02-18  5:42   ` Tian, Kevin
  3 siblings, 2 replies; 15+ messages in thread
From: Jan Beulich @ 2022-01-27 14:49 UTC (permalink / raw)
  To: xen-devel; +Cc: Paul Durrant, Andrew Cooper, Kevin Tian

The VT-d hook can indicate an error, which shouldn't be ignored. Convert
the hook's return value to a proper error code, and let that bubble up.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
I'm not convinced of the XSM related behavior here: It's neither clear
why the check gets performed on the possible further group members
instead of on the passed in device, nor - if the former is indeed
intended behavior - why the loop then simply gets continued instead of
returning an error. After all in such a case the reported "group" is
actually incomplete, which can't result in anything good.

I'm further unconvinced that it is correct for the AMD hook to never
return an error.
---
v2: New.

--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -1463,6 +1463,8 @@ static int iommu_get_device_group(
         return 0;
 
     group_id = iommu_call(ops, get_device_group_id, seg, bus, devfn);
+    if ( group_id < 0 )
+        return group_id;
 
     pcidevs_lock();
     for_each_pdev( d, pdev )
@@ -1477,6 +1479,12 @@ static int iommu_get_device_group(
             continue;
 
         sdev_id = iommu_call(ops, get_device_group_id, seg, b, df);
+        if ( sdev_id < 0 )
+        {
+            pcidevs_unlock();
+            return sdev_id;
+        }
+
         if ( (sdev_id == group_id) && (i < max_sdevs) )
         {
             bdf = (b << 16) | (df << 8);
@@ -1484,7 +1492,7 @@ static int iommu_get_device_group(
             if ( unlikely(copy_to_guest_offset(buf, i, &bdf, 1)) )
             {
                 pcidevs_unlock();
-                return -1;
+                return -EFAULT;
             }
             i++;
         }
@@ -1552,8 +1560,7 @@ int iommu_do_pci_domctl(
         ret = iommu_get_device_group(d, seg, bus, devfn, sdevs, max_sdevs);
         if ( ret < 0 )
         {
-            dprintk(XENLOG_ERR, "iommu_get_device_group() failed!\n");
-            ret = -EFAULT;
+            dprintk(XENLOG_ERR, "iommu_get_device_group() failed: %d\n", ret);
             domctl->u.get_device_group.num_sdevs = 0;
         }
         else
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -2564,10 +2564,11 @@ static int intel_iommu_assign_device(
 static int intel_iommu_group_id(u16 seg, u8 bus, u8 devfn)
 {
     u8 secbus;
+
     if ( find_upstream_bridge(seg, &bus, &devfn, &secbus) < 0 )
-        return -1;
-    else
-        return PCI_BDF2(bus, devfn);
+        return -ENODEV;
+
+    return PCI_BDF2(bus, devfn);
 }
 
 static int __must_check vtd_suspend(void)



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 1/4] IOMMU/x86: switch to alternatives-call patching in further instances
  2022-01-27 14:47 ` [PATCH v2 1/4] IOMMU/x86: switch to alternatives-call patching in further instances Jan Beulich
@ 2022-01-28  9:28   ` Durrant, Paul
  2022-01-28 10:36     ` Jan Beulich
  2022-01-28 10:40   ` Rahul Singh
  1 sibling, 1 reply; 15+ messages in thread
From: Durrant, Paul @ 2022-01-28  9:28 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: Paul Durrant, Andrew Cooper

On 27/01/2022 14:47, Jan Beulich wrote:
> This is, once again, to limit the number of indirect calls as much as
> possible. The only hook invocation which isn't sensible to convert is
> setup(). And of course Arm-only use sites are left alone as well.
> 
> Note regarding the introduction / use of local variables in pci.c:
> struct pci_dev's involved fields are const. This const propagates, via
> typeof(), to the local helper variables in the altcall macros. These
> helper variables are, however, used as outputs (and hence can't be
> const). In iommu_get_device_group() make use of the new local variables
> to also simplify some adjacent code.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> 
> --- a/xen/drivers/passthrough/iommu.c
> +++ b/xen/drivers/passthrough/iommu.c
> @@ -198,7 +198,7 @@ int iommu_domain_init(struct domain *d,
>           return ret;
>   
>       hd->platform_ops = iommu_get_ops();
> -    ret = hd->platform_ops->init(d);
> +    ret = iommu_call(hd->platform_ops, init, d);
>       if ( ret || is_system_domain(d) )
>           return ret;
>   
> @@ -233,7 +233,7 @@ void __hwdom_init iommu_hwdom_init(struc
>   
>       register_keyhandler('o', &iommu_dump_page_tables, "dump iommu page tables", 0);
>   
> -    hd->platform_ops->hwdom_init(d);
> +    iommu_vcall(hd->platform_ops, hwdom_init, d);
>   }
>   
>   static void iommu_teardown(struct domain *d)
> @@ -576,7 +576,7 @@ int iommu_get_reserved_device_memory(iom
>       if ( !ops->get_reserved_device_memory )
>           return 0;
>   
> -    return ops->get_reserved_device_memory(func, ctxt);
> +    return iommu_call(ops, get_reserved_device_memory, func, ctxt);
>   }
>   
>   bool_t iommu_has_feature(struct domain *d, enum iommu_feature feature)
> @@ -603,7 +603,7 @@ static void iommu_dump_page_tables(unsig
>               continue;
>           }
>   
> -        dom_iommu(d)->platform_ops->dump_page_tables(d);
> +        iommu_vcall(dom_iommu(d)->platform_ops, dump_page_tables, d);
>       }
>   
>       rcu_read_unlock(&domlist_read_lock);
> --- a/xen/drivers/passthrough/pci.c
> +++ b/xen/drivers/passthrough/pci.c
> @@ -861,15 +861,15 @@ static int deassign_device(struct domain
>           devfn += pdev->phantom_stride;
>           if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
>               break;
> -        ret = hd->platform_ops->reassign_device(d, target, devfn,
> -                                                pci_to_dev(pdev));
> +        ret = iommu_call(hd->platform_ops, reassign_device, d, target, devfn,
> +                         pci_to_dev(pdev));
>           if ( ret )
>               goto out;
>       }
>   
>       devfn = pdev->devfn;
> -    ret = hd->platform_ops->reassign_device(d, target, devfn,
> -                                            pci_to_dev(pdev));
> +    ret = iommu_call(hd->platform_ops, reassign_device, d, target, devfn,
> +                     pci_to_dev(pdev));
>       if ( ret )
>           goto out;
>   
> @@ -1300,7 +1300,7 @@ static int iommu_add_device(struct pci_d
>   {
>       const struct domain_iommu *hd;
>       int rc;
> -    u8 devfn;
> +    unsigned int devfn = pdev->devfn;
>   
>       if ( !pdev->domain )
>           return -EINVAL;
> @@ -1311,16 +1311,16 @@ static int iommu_add_device(struct pci_d
>       if ( !is_iommu_enabled(pdev->domain) )
>           return 0;
>   
> -    rc = hd->platform_ops->add_device(pdev->devfn, pci_to_dev(pdev));
> +    rc = iommu_call(hd->platform_ops, add_device, devfn, pci_to_dev(pdev));
>       if ( rc || !pdev->phantom_stride )
>           return rc;
>   
> -    for ( devfn = pdev->devfn ; ; )
> +    for ( ; ; )
>       {
>           devfn += pdev->phantom_stride;
>           if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
>               return 0;
> -        rc = hd->platform_ops->add_device(devfn, pci_to_dev(pdev));
> +        rc = iommu_call(hd->platform_ops, add_device, devfn, pci_to_dev(pdev));
>           if ( rc )
>               printk(XENLOG_WARNING "IOMMU: add %pp failed (%d)\n",
>                      &pdev->sbdf, rc);
> @@ -1341,7 +1341,7 @@ static int iommu_enable_device(struct pc
>            !hd->platform_ops->enable_device )
>           return 0;
>   
> -    return hd->platform_ops->enable_device(pci_to_dev(pdev));
> +    return iommu_call(hd->platform_ops, enable_device, pci_to_dev(pdev));
>   }
>   
>   static int iommu_remove_device(struct pci_dev *pdev)
> @@ -1363,7 +1363,8 @@ static int iommu_remove_device(struct pc
>           devfn += pdev->phantom_stride;
>           if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
>               break;
> -        rc = hd->platform_ops->remove_device(devfn, pci_to_dev(pdev));
> +        rc = iommu_call(hd->platform_ops, remove_device, devfn,
> +                        pci_to_dev(pdev));
>           if ( !rc )
>               continue;
>   
> @@ -1371,7 +1372,9 @@ static int iommu_remove_device(struct pc
>           return rc;
>       }
>   
> -    return hd->platform_ops->remove_device(pdev->devfn, pci_to_dev(pdev));
> +    devfn = pdev->devfn;
> +
> +    return iommu_call(hd->platform_ops, remove_device, devfn, pci_to_dev(pdev));
>   }
>   
>   static int device_assigned(u16 seg, u8 bus, u8 devfn)
> @@ -1421,7 +1424,8 @@ static int assign_device(struct domain *
>   
>       pdev->fault.count = 0;
>   
> -    if ( (rc = hd->platform_ops->assign_device(d, devfn, pci_to_dev(pdev), flag)) )
> +    if ( (rc = iommu_call(hd->platform_ops, assign_device, d, devfn,
> +                          pci_to_dev(pdev), flag)) )
>           goto done;
>   
>       for ( ; pdev->phantom_stride; rc = 0 )
> @@ -1429,7 +1433,8 @@ static int assign_device(struct domain *
>           devfn += pdev->phantom_stride;
>           if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
>               break;
> -        rc = hd->platform_ops->assign_device(d, devfn, pci_to_dev(pdev), flag);
> +        rc = iommu_call(hd->platform_ops, assign_device, d, devfn,
> +                        pci_to_dev(pdev), flag);
>       }
>   
>    done:
> @@ -1457,24 +1462,24 @@ static int iommu_get_device_group(
>       if ( !is_iommu_enabled(d) || !ops->get_device_group_id )
>           return 0;
>   
> -    group_id = ops->get_device_group_id(seg, bus, devfn);
> +    group_id = iommu_call(ops, get_device_group_id, seg, bus, devfn);
>   
>       pcidevs_lock();
>       for_each_pdev( d, pdev )
>       {
> -        if ( (pdev->seg != seg) ||
> -             ((pdev->bus == bus) && (pdev->devfn == devfn)) )
> +        unsigned int b = pdev->bus;
> +        unsigned int df = pdev->devfn;
> +
> +        if ( (pdev->seg != seg) || ((b == bus) && (df == devfn)) )
>               continue;
>   
> -        if ( xsm_get_device_group(XSM_HOOK, (seg << 16) | (pdev->bus << 8) | pdev->devfn) )
> +        if ( xsm_get_device_group(XSM_HOOK, (seg << 16) | (b << 8) | df) )
>               continue;
>   
> -        sdev_id = ops->get_device_group_id(seg, pdev->bus, pdev->devfn);
> +        sdev_id = iommu_call(ops, get_device_group_id, seg, b, df);
>           if ( (sdev_id == group_id) && (i < max_sdevs) )
>           {
> -            bdf = 0;
> -            bdf |= (pdev->bus & 0xff) << 16;
> -            bdf |= (pdev->devfn & 0xff) << 8;
> +            bdf = (b << 16) | (df << 8);

Don't we have a macro for this now? Probably best to start using it 
whilst modifying the code.

Anyway...

Reviewed-by: Paul Durrant <paul@xen.org>

>   
>               if ( unlikely(copy_to_guest_offset(buf, i, &bdf, 1)) )
>               {
> --- a/xen/drivers/passthrough/x86/iommu.c
> +++ b/xen/drivers/passthrough/x86/iommu.c
> @@ -145,7 +145,7 @@ unsigned int iommu_read_apic_from_ire(un
>   int __init iommu_setup_hpet_msi(struct msi_desc *msi)
>   {
>       const struct iommu_ops *ops = iommu_get_ops();
> -    return ops->setup_hpet_msi ? ops->setup_hpet_msi(msi) : -ENODEV;
> +    return ops->setup_hpet_msi ? iommu_call(ops, setup_hpet_msi, msi) : -ENODEV;
>   }
>   
>   void __hwdom_init arch_iommu_check_autotranslated_hwdom(struct domain *d)
> @@ -406,7 +406,7 @@ int iommu_free_pgtables(struct domain *d
>        * Pages will be moved to the free list below. So we want to
>        * clear the root page-table to avoid any potential use after-free.
>        */
> -    hd->platform_ops->clear_root_pgtable(d);
> +    iommu_vcall(hd->platform_ops, clear_root_pgtable, d);
>   
>       while ( (pg = page_list_remove_head(&hd->arch.pgtables.list)) )
>       {
> 



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 2/4] VT-d / x86: re-arrange cache syncing
  2022-01-27 14:48 ` [PATCH v2 2/4] VT-d / x86: re-arrange cache syncing Jan Beulich
@ 2022-01-28  9:32   ` Durrant, Paul
  2022-02-18  5:34   ` Tian, Kevin
  1 sibling, 0 replies; 15+ messages in thread
From: Durrant, Paul @ 2022-01-28  9:32 UTC (permalink / raw)
  To: Jan Beulich, xen-devel
  Cc: Paul Durrant, Kevin Tian, Andrew Cooper, Wei Liu, Roger Pau Monné

On 27/01/2022 14:48, Jan Beulich wrote:
> The actual function should always have lived in core x86 code; move it
> there, replacing get_cache_line_size() by readily available (except very
> early during boot; see the code comment) data. Also rename the function.
> 
> Drop the respective IOMMU hook, (re)introducing a respective boolean
> instead. Replace a true and an almost open-coding instance of
> iommu_sync_cache().
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: Paul Durrant <paul@xen.org>


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 3/4] VT-d: replace flush_all_cache()
  2022-01-27 14:49 ` [PATCH v2 3/4] VT-d: replace flush_all_cache() Jan Beulich
@ 2022-01-28  9:33   ` Durrant, Paul
  2022-02-18  5:35   ` Tian, Kevin
  1 sibling, 0 replies; 15+ messages in thread
From: Durrant, Paul @ 2022-01-28  9:33 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: Paul Durrant, Andrew Cooper, Kevin Tian

On 27/01/2022 14:49, Jan Beulich wrote:
> Let's use infrastructure we have available instead of an open-coded
> wbinvd() invocation.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: Paul Durrant <paul@xen.org>


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 4/4] IOMMU/PCI: propagate get_device_group_id() failure
  2022-01-27 14:49 ` [PATCH v2 4/4] IOMMU/PCI: propagate get_device_group_id() failure Jan Beulich
@ 2022-01-28  9:37   ` Durrant, Paul
  2022-02-18  5:42   ` Tian, Kevin
  1 sibling, 0 replies; 15+ messages in thread
From: Durrant, Paul @ 2022-01-28  9:37 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: Paul Durrant, Andrew Cooper, Kevin Tian

On 27/01/2022 14:49, Jan Beulich wrote:
> The VT-d hook can indicate an error, which shouldn't be ignored. Convert
> the hook's return value to a proper error code, and let that bubble up.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: Paul Durrant <paul@xen.org>


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 1/4] IOMMU/x86: switch to alternatives-call patching in further instances
  2022-01-28  9:28   ` Durrant, Paul
@ 2022-01-28 10:36     ` Jan Beulich
  2022-01-28 10:54       ` Andrew Cooper
  0 siblings, 1 reply; 15+ messages in thread
From: Jan Beulich @ 2022-01-28 10:36 UTC (permalink / raw)
  To: paul; +Cc: Andrew Cooper, xen-devel

On 28.01.2022 10:28, Durrant, Paul wrote:
> On 27/01/2022 14:47, Jan Beulich wrote:
>> @@ -1457,24 +1462,24 @@ static int iommu_get_device_group(
>>       if ( !is_iommu_enabled(d) || !ops->get_device_group_id )
>>           return 0;
>>   
>> -    group_id = ops->get_device_group_id(seg, bus, devfn);
>> +    group_id = iommu_call(ops, get_device_group_id, seg, bus, devfn);
>>   
>>       pcidevs_lock();
>>       for_each_pdev( d, pdev )
>>       {
>> -        if ( (pdev->seg != seg) ||
>> -             ((pdev->bus == bus) && (pdev->devfn == devfn)) )
>> +        unsigned int b = pdev->bus;
>> +        unsigned int df = pdev->devfn;
>> +
>> +        if ( (pdev->seg != seg) || ((b == bus) && (df == devfn)) )
>>               continue;
>>   
>> -        if ( xsm_get_device_group(XSM_HOOK, (seg << 16) | (pdev->bus << 8) | pdev->devfn) )
>> +        if ( xsm_get_device_group(XSM_HOOK, (seg << 16) | (b << 8) | df) )
>>               continue;
>>   
>> -        sdev_id = ops->get_device_group_id(seg, pdev->bus, pdev->devfn);
>> +        sdev_id = iommu_call(ops, get_device_group_id, seg, b, df);
>>           if ( (sdev_id == group_id) && (i < max_sdevs) )
>>           {
>> -            bdf = 0;
>> -            bdf |= (pdev->bus & 0xff) << 16;
>> -            bdf |= (pdev->devfn & 0xff) << 8;
>> +            bdf = (b << 16) | (df << 8);
> 
> Don't we have a macro for this now? Probably best to start using it 
> whilst modifying the code.

We don't. And it would feel somewhat misleading to use PCI_BDF2(b, df) << 8
here. The situation is even worse imo: Besides there not being a macro, I
also cannot seem to find any documentation on this non-standard layout (BDF
shifted left by 8). Yet then again I also can't spot any caller of
xc_get_device_group() ...

> Reviewed-by: Paul Durrant <paul@xen.org>

Thanks.

Jan



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 1/4] IOMMU/x86: switch to alternatives-call patching in further instances
  2022-01-27 14:47 ` [PATCH v2 1/4] IOMMU/x86: switch to alternatives-call patching in further instances Jan Beulich
  2022-01-28  9:28   ` Durrant, Paul
@ 2022-01-28 10:40   ` Rahul Singh
  1 sibling, 0 replies; 15+ messages in thread
From: Rahul Singh @ 2022-01-28 10:40 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, Paul Durrant, Andrew Cooper

Hi Jan,

> On 27 Jan 2022, at 2:47 pm, Jan Beulich <jbeulich@suse.com> wrote:
> 
> This is, once again, to limit the number of indirect calls as much as
> possible. The only hook invocation which isn't sensible to convert is
> setup(). And of course Arm-only use sites are left alone as well.
> 
> Note regarding the introduction / use of local variables in pci.c:
> struct pci_dev's involved fields are const. This const propagates, via
> typeof(), to the local helper variables in the altcall macros. These
> helper variables are, however, used as outputs (and hence can't be
> const). In iommu_get_device_group() make use of the new local variables
> to also simplify some adjacent code.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: Rahul Singh <rahul.singh@arm.com>
Tested-by: Rahul Singh <rahul.singh@arm.com>

Regards,
Rahul
> 
> --- a/xen/drivers/passthrough/iommu.c
> +++ b/xen/drivers/passthrough/iommu.c
> @@ -198,7 +198,7 @@ int iommu_domain_init(struct domain *d,
>         return ret;
> 
>     hd->platform_ops = iommu_get_ops();
> -    ret = hd->platform_ops->init(d);
> +    ret = iommu_call(hd->platform_ops, init, d);
>     if ( ret || is_system_domain(d) )
>         return ret;
> 
> @@ -233,7 +233,7 @@ void __hwdom_init iommu_hwdom_init(struc
> 
>     register_keyhandler('o', &iommu_dump_page_tables, "dump iommu page tables", 0);
> 
> -    hd->platform_ops->hwdom_init(d);
> +    iommu_vcall(hd->platform_ops, hwdom_init, d);
> }
> 
> static void iommu_teardown(struct domain *d)
> @@ -576,7 +576,7 @@ int iommu_get_reserved_device_memory(iom
>     if ( !ops->get_reserved_device_memory )
>         return 0;
> 
> -    return ops->get_reserved_device_memory(func, ctxt);
> +    return iommu_call(ops, get_reserved_device_memory, func, ctxt);
> }
> 
> bool_t iommu_has_feature(struct domain *d, enum iommu_feature feature)
> @@ -603,7 +603,7 @@ static void iommu_dump_page_tables(unsig
>             continue;
>         }
> 
> -        dom_iommu(d)->platform_ops->dump_page_tables(d);
> +        iommu_vcall(dom_iommu(d)->platform_ops, dump_page_tables, d);
>     }
> 
>     rcu_read_unlock(&domlist_read_lock);
> --- a/xen/drivers/passthrough/pci.c
> +++ b/xen/drivers/passthrough/pci.c
> @@ -861,15 +861,15 @@ static int deassign_device(struct domain
>         devfn += pdev->phantom_stride;
>         if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
>             break;
> -        ret = hd->platform_ops->reassign_device(d, target, devfn,
> -                                                pci_to_dev(pdev));
> +        ret = iommu_call(hd->platform_ops, reassign_device, d, target, devfn,
> +                         pci_to_dev(pdev));
>         if ( ret )
>             goto out;
>     }
> 
>     devfn = pdev->devfn;
> -    ret = hd->platform_ops->reassign_device(d, target, devfn,
> -                                            pci_to_dev(pdev));
> +    ret = iommu_call(hd->platform_ops, reassign_device, d, target, devfn,
> +                     pci_to_dev(pdev));
>     if ( ret )
>         goto out;
> 
> @@ -1300,7 +1300,7 @@ static int iommu_add_device(struct pci_d
> {
>     const struct domain_iommu *hd;
>     int rc;
> -    u8 devfn;
> +    unsigned int devfn = pdev->devfn;
> 
>     if ( !pdev->domain )
>         return -EINVAL;
> @@ -1311,16 +1311,16 @@ static int iommu_add_device(struct pci_d
>     if ( !is_iommu_enabled(pdev->domain) )
>         return 0;
> 
> -    rc = hd->platform_ops->add_device(pdev->devfn, pci_to_dev(pdev));
> +    rc = iommu_call(hd->platform_ops, add_device, devfn, pci_to_dev(pdev));
>     if ( rc || !pdev->phantom_stride )
>         return rc;
> 
> -    for ( devfn = pdev->devfn ; ; )
> +    for ( ; ; )
>     {
>         devfn += pdev->phantom_stride;
>         if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
>             return 0;
> -        rc = hd->platform_ops->add_device(devfn, pci_to_dev(pdev));
> +        rc = iommu_call(hd->platform_ops, add_device, devfn, pci_to_dev(pdev));
>         if ( rc )
>             printk(XENLOG_WARNING "IOMMU: add %pp failed (%d)\n",
>                    &pdev->sbdf, rc);
> @@ -1341,7 +1341,7 @@ static int iommu_enable_device(struct pc
>          !hd->platform_ops->enable_device )
>         return 0;
> 
> -    return hd->platform_ops->enable_device(pci_to_dev(pdev));
> +    return iommu_call(hd->platform_ops, enable_device, pci_to_dev(pdev));
> }
> 
> static int iommu_remove_device(struct pci_dev *pdev)
> @@ -1363,7 +1363,8 @@ static int iommu_remove_device(struct pc
>         devfn += pdev->phantom_stride;
>         if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
>             break;
> -        rc = hd->platform_ops->remove_device(devfn, pci_to_dev(pdev));
> +        rc = iommu_call(hd->platform_ops, remove_device, devfn,
> +                        pci_to_dev(pdev));
>         if ( !rc )
>             continue;
> 
> @@ -1371,7 +1372,9 @@ static int iommu_remove_device(struct pc
>         return rc;
>     }
> 
> -    return hd->platform_ops->remove_device(pdev->devfn, pci_to_dev(pdev));
> +    devfn = pdev->devfn;
> +
> +    return iommu_call(hd->platform_ops, remove_device, devfn, pci_to_dev(pdev));
> }
> 
> static int device_assigned(u16 seg, u8 bus, u8 devfn)
> @@ -1421,7 +1424,8 @@ static int assign_device(struct domain *
> 
>     pdev->fault.count = 0;
> 
> -    if ( (rc = hd->platform_ops->assign_device(d, devfn, pci_to_dev(pdev), flag)) )
> +    if ( (rc = iommu_call(hd->platform_ops, assign_device, d, devfn,
> +                          pci_to_dev(pdev), flag)) )
>         goto done;
> 
>     for ( ; pdev->phantom_stride; rc = 0 )
> @@ -1429,7 +1433,8 @@ static int assign_device(struct domain *
>         devfn += pdev->phantom_stride;
>         if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
>             break;
> -        rc = hd->platform_ops->assign_device(d, devfn, pci_to_dev(pdev), flag);
> +        rc = iommu_call(hd->platform_ops, assign_device, d, devfn,
> +                        pci_to_dev(pdev), flag);
>     }
> 
>  done:
> @@ -1457,24 +1462,24 @@ static int iommu_get_device_group(
>     if ( !is_iommu_enabled(d) || !ops->get_device_group_id )
>         return 0;
> 
> -    group_id = ops->get_device_group_id(seg, bus, devfn);
> +    group_id = iommu_call(ops, get_device_group_id, seg, bus, devfn);
> 
>     pcidevs_lock();
>     for_each_pdev( d, pdev )
>     {
> -        if ( (pdev->seg != seg) ||
> -             ((pdev->bus == bus) && (pdev->devfn == devfn)) )
> +        unsigned int b = pdev->bus;
> +        unsigned int df = pdev->devfn;
> +
> +        if ( (pdev->seg != seg) || ((b == bus) && (df == devfn)) )
>             continue;
> 
> -        if ( xsm_get_device_group(XSM_HOOK, (seg << 16) | (pdev->bus << 8) | pdev->devfn) )
> +        if ( xsm_get_device_group(XSM_HOOK, (seg << 16) | (b << 8) | df) )
>             continue;
> 
> -        sdev_id = ops->get_device_group_id(seg, pdev->bus, pdev->devfn);
> +        sdev_id = iommu_call(ops, get_device_group_id, seg, b, df);
>         if ( (sdev_id == group_id) && (i < max_sdevs) )
>         {
> -            bdf = 0;
> -            bdf |= (pdev->bus & 0xff) << 16;
> -            bdf |= (pdev->devfn & 0xff) << 8;
> +            bdf = (b << 16) | (df << 8);
> 
>             if ( unlikely(copy_to_guest_offset(buf, i, &bdf, 1)) )
>             {
> --- a/xen/drivers/passthrough/x86/iommu.c
> +++ b/xen/drivers/passthrough/x86/iommu.c
> @@ -145,7 +145,7 @@ unsigned int iommu_read_apic_from_ire(un
> int __init iommu_setup_hpet_msi(struct msi_desc *msi)
> {
>     const struct iommu_ops *ops = iommu_get_ops();
> -    return ops->setup_hpet_msi ? ops->setup_hpet_msi(msi) : -ENODEV;
> +    return ops->setup_hpet_msi ? iommu_call(ops, setup_hpet_msi, msi) : -ENODEV;
> }
> 
> void __hwdom_init arch_iommu_check_autotranslated_hwdom(struct domain *d)
> @@ -406,7 +406,7 @@ int iommu_free_pgtables(struct domain *d
>      * Pages will be moved to the free list below. So we want to
>      * clear the root page-table to avoid any potential use after-free.
>      */
> -    hd->platform_ops->clear_root_pgtable(d);
> +    iommu_vcall(hd->platform_ops, clear_root_pgtable, d);
> 
>     while ( (pg = page_list_remove_head(&hd->arch.pgtables.list)) )
>     {
> 
> 



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 1/4] IOMMU/x86: switch to alternatives-call patching in further instances
  2022-01-28 10:36     ` Jan Beulich
@ 2022-01-28 10:54       ` Andrew Cooper
  0 siblings, 0 replies; 15+ messages in thread
From: Andrew Cooper @ 2022-01-28 10:54 UTC (permalink / raw)
  To: Jan Beulich, paul; +Cc: xen-devel

On 28/01/2022 10:36, Jan Beulich wrote:
> On 28.01.2022 10:28, Durrant, Paul wrote:
>> On 27/01/2022 14:47, Jan Beulich wrote:
>>> @@ -1457,24 +1462,24 @@ static int iommu_get_device_group(
>>>       if ( !is_iommu_enabled(d) || !ops->get_device_group_id )
>>>           return 0;
>>>   
>>> -    group_id = ops->get_device_group_id(seg, bus, devfn);
>>> +    group_id = iommu_call(ops, get_device_group_id, seg, bus, devfn);
>>>   
>>>       pcidevs_lock();
>>>       for_each_pdev( d, pdev )
>>>       {
>>> -        if ( (pdev->seg != seg) ||
>>> -             ((pdev->bus == bus) && (pdev->devfn == devfn)) )
>>> +        unsigned int b = pdev->bus;
>>> +        unsigned int df = pdev->devfn;
>>> +
>>> +        if ( (pdev->seg != seg) || ((b == bus) && (df == devfn)) )
>>>               continue;
>>>   
>>> -        if ( xsm_get_device_group(XSM_HOOK, (seg << 16) | (pdev->bus << 8) | pdev->devfn) )
>>> +        if ( xsm_get_device_group(XSM_HOOK, (seg << 16) | (b << 8) | df) )
>>>               continue;
>>>   
>>> -        sdev_id = ops->get_device_group_id(seg, pdev->bus, pdev->devfn);
>>> +        sdev_id = iommu_call(ops, get_device_group_id, seg, b, df);
>>>           if ( (sdev_id == group_id) && (i < max_sdevs) )
>>>           {
>>> -            bdf = 0;
>>> -            bdf |= (pdev->bus & 0xff) << 16;
>>> -            bdf |= (pdev->devfn & 0xff) << 8;
>>> +            bdf = (b << 16) | (df << 8);
>> Don't we have a macro for this now? Probably best to start using it 
>> whilst modifying the code.
> We don't. And it would feel somewhat misleading to use PCI_BDF2(b, df) << 8
> here. The situation is even worse imo: Besides there not being a macro, I
> also cannot seem to find any documentation on this non-standard layout (BDF
> shifted left by 8). Yet then again I also can't spot any caller of
> xc_get_device_group() ...

I'm sure I already did the archaeology.

device groups were broken by a hypercall bounce buffering change 2 years
before the only caller was dropped with Xend.

This mess of a hypercall has demonstrably not been used in a decade.  I
firmly suggest dropping it, rather than wasting effort trying to unbreak
an interface which needs deleting anyway as the first step to doing
IOMMU groups.

~Andrew

^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: [PATCH v2 2/4] VT-d / x86: re-arrange cache syncing
  2022-01-27 14:48 ` [PATCH v2 2/4] VT-d / x86: re-arrange cache syncing Jan Beulich
  2022-01-28  9:32   ` Durrant, Paul
@ 2022-02-18  5:34   ` Tian, Kevin
  1 sibling, 0 replies; 15+ messages in thread
From: Tian, Kevin @ 2022-02-18  5:34 UTC (permalink / raw)
  To: Beulich, Jan, xen-devel
  Cc: Paul Durrant, Cooper, Andrew, Wei Liu, Pau Monné, Roger

> From: Jan Beulich <jbeulich@suse.com>
> Sent: Thursday, January 27, 2022 10:48 PM
> 
> The actual function should always have lived in core x86 code; move it
> there, replacing get_cache_line_size() by readily available (except very
> early during boot; see the code comment) data. Also rename the function.
> 
> Drop the respective IOMMU hook, (re)introducing a respective boolean
> instead. Replace a true and an almost open-coding instance of
> iommu_sync_cache().
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: Kevin Tian <kevin.tian@intel.com>

> ---
> Placing the function next to flush_area_local() exposes a curious
> asymmetry between the SFENCE placements: sync_cache() has it after the
> flush, while flush_area_local() has it before it. I think the latter one
> is misplaced.
> ---
> v2: Rename sync_cache() to cache_writeback().
> 
> --- a/xen/arch/x86/flushtlb.c
> +++ b/xen/arch/x86/flushtlb.c
> @@ -11,6 +11,7 @@
>  #include <xen/sched.h>
>  #include <xen/smp.h>
>  #include <xen/softirq.h>
> +#include <asm/cache.h>
>  #include <asm/flushtlb.h>
>  #include <asm/invpcid.h>
>  #include <asm/nops.h>
> @@ -265,6 +266,57 @@ unsigned int flush_area_local(const void
>      return flags;
>  }
> 
> +void cache_writeback(const void *addr, unsigned int size)
> +{
> +    /*
> +     * This function may be called before current_cpu_data is established.
> +     * Hence a fallback is needed to prevent the loop below becoming infinite.
> +     */
> +    unsigned int clflush_size = current_cpu_data.x86_clflush_size ?: 16;
> +    const void *end = addr + size;
> +
> +    addr -= (unsigned long)addr & (clflush_size - 1);
> +    for ( ; addr < end; addr += clflush_size )
> +    {
> +/*
> + * The arguments to a macro must not include preprocessor directives.
> Doing so
> + * results in undefined behavior, so we have to create some defines here in
> + * order to avoid it.
> + */
> +#if defined(HAVE_AS_CLWB)
> +# define CLWB_ENCODING "clwb %[p]"
> +#elif defined(HAVE_AS_XSAVEOPT)
> +# define CLWB_ENCODING "data16 xsaveopt %[p]" /* clwb */
> +#else
> +# define CLWB_ENCODING ".byte 0x66, 0x0f, 0xae, 0x30" /* clwb (%%rax)
> */
> +#endif
> +
> +#define BASE_INPUT(addr) [p] "m" (*(const char *)(addr))
> +#if defined(HAVE_AS_CLWB) || defined(HAVE_AS_XSAVEOPT)
> +# define INPUT BASE_INPUT
> +#else
> +# define INPUT(addr) "a" (addr), BASE_INPUT(addr)
> +#endif
> +        /*
> +         * Note regarding the use of NOP_DS_PREFIX: it's faster to do a clflush
> +         * + prefix than a clflush + nop, and hence the prefix is added instead
> +         * of letting the alternative framework fill the gap by appending nops.
> +         */
> +        alternative_io_2(".byte " __stringify(NOP_DS_PREFIX) "; clflush %[p]",
> +                         "data16 clflush %[p]", /* clflushopt */
> +                         X86_FEATURE_CLFLUSHOPT,
> +                         CLWB_ENCODING,
> +                         X86_FEATURE_CLWB, /* no outputs */,
> +                         INPUT(addr));
> +#undef INPUT
> +#undef BASE_INPUT
> +#undef CLWB_ENCODING
> +    }
> +
> +    alternative_2("", "sfence", X86_FEATURE_CLFLUSHOPT,
> +                      "sfence", X86_FEATURE_CLWB);
> +}
> +
>  unsigned int guest_flush_tlb_flags(const struct domain *d)
>  {
>      bool shadow = paging_mode_shadow(d);
> --- a/xen/drivers/passthrough/vtd/iommu.c
> +++ b/xen/drivers/passthrough/vtd/iommu.c
> @@ -240,54 +240,6 @@ domid_t did_to_domain_id(const struct vt
>      return iommu->domid_map[did];
>  }
> 
> -static void sync_cache(const void *addr, unsigned int size)
> -{
> -    static unsigned long clflush_size = 0;
> -    const void *end = addr + size;
> -
> -    if ( clflush_size == 0 )
> -        clflush_size = get_cache_line_size();
> -
> -    addr -= (unsigned long)addr & (clflush_size - 1);
> -    for ( ; addr < end; addr += clflush_size )
> -/*
> - * The arguments to a macro must not include preprocessor directives.
> Doing so
> - * results in undefined behavior, so we have to create some defines here in
> - * order to avoid it.
> - */
> -#if defined(HAVE_AS_CLWB)
> -# define CLWB_ENCODING "clwb %[p]"
> -#elif defined(HAVE_AS_XSAVEOPT)
> -# define CLWB_ENCODING "data16 xsaveopt %[p]" /* clwb */
> -#else
> -# define CLWB_ENCODING ".byte 0x66, 0x0f, 0xae, 0x30" /* clwb (%%rax) */
> -#endif
> -
> -#define BASE_INPUT(addr) [p] "m" (*(const char *)(addr))
> -#if defined(HAVE_AS_CLWB) || defined(HAVE_AS_XSAVEOPT)
> -# define INPUT BASE_INPUT
> -#else
> -# define INPUT(addr) "a" (addr), BASE_INPUT(addr)
> -#endif
> -        /*
> -         * Note regarding the use of NOP_DS_PREFIX: it's faster to do a clflush
> -         * + prefix than a clflush + nop, and hence the prefix is added instead
> -         * of letting the alternative framework fill the gap by appending nops.
> -         */
> -        alternative_io_2(".byte " __stringify(NOP_DS_PREFIX) "; clflush %[p]",
> -                         "data16 clflush %[p]", /* clflushopt */
> -                         X86_FEATURE_CLFLUSHOPT,
> -                         CLWB_ENCODING,
> -                         X86_FEATURE_CLWB, /* no outputs */,
> -                         INPUT(addr));
> -#undef INPUT
> -#undef BASE_INPUT
> -#undef CLWB_ENCODING
> -
> -    alternative_2("", "sfence", X86_FEATURE_CLFLUSHOPT,
> -                      "sfence", X86_FEATURE_CLWB);
> -}
> -
>  /* Allocate page table, return its machine address */
>  uint64_t alloc_pgtable_maddr(unsigned long npages, nodeid_t node)
>  {
> @@ -306,8 +258,7 @@ uint64_t alloc_pgtable_maddr(unsigned lo
> 
>          clear_page(vaddr);
> 
> -        if ( (iommu_ops.init ? &iommu_ops : &vtd_ops)->sync_cache )
> -            sync_cache(vaddr, PAGE_SIZE);
> +        iommu_sync_cache(vaddr, PAGE_SIZE);
>          unmap_domain_page(vaddr);
>          cur_pg++;
>      }
> @@ -1327,7 +1278,7 @@ int __init iommu_alloc(struct acpi_drhd_
>      iommu->nr_pt_levels = agaw_to_level(agaw);
> 
>      if ( !ecap_coherent(iommu->ecap) )
> -        vtd_ops.sync_cache = sync_cache;
> +        iommu_non_coherent = true;
> 
>      nr_dom = cap_ndoms(iommu->cap);
> 
> --- a/xen/drivers/passthrough/x86/iommu.c
> +++ b/xen/drivers/passthrough/x86/iommu.c
> @@ -28,6 +28,7 @@
> 
>  const struct iommu_init_ops *__initdata iommu_init_ops;
>  struct iommu_ops __read_mostly iommu_ops;
> +bool __read_mostly iommu_non_coherent;
> 
>  enum iommu_intremap __read_mostly iommu_intremap =
> iommu_intremap_full;
> 
> @@ -438,8 +439,7 @@ struct page_info *iommu_alloc_pgtable(st
>      p = __map_domain_page(pg);
>      clear_page(p);
> 
> -    if ( hd->platform_ops->sync_cache )
> -        iommu_vcall(hd->platform_ops, sync_cache, p, PAGE_SIZE);
> +    iommu_sync_cache(p, PAGE_SIZE);
> 
>      unmap_domain_page(p);
> 
> --- a/xen/arch/x86/include/asm/cache.h
> +++ b/xen/arch/x86/include/asm/cache.h
> @@ -11,4 +11,10 @@
> 
>  #define __read_mostly __section(".data.read_mostly")
> 
> +#ifndef __ASSEMBLY__
> +
> +void cache_writeback(const void *addr, unsigned int size);
> +
> +#endif
> +
>  #endif
> --- a/xen/arch/x86/include/asm/iommu.h
> +++ b/xen/arch/x86/include/asm/iommu.h
> @@ -19,6 +19,7 @@
>  #include <xen/mem_access.h>
>  #include <xen/spinlock.h>
>  #include <asm/apicdef.h>
> +#include <asm/cache.h>
>  #include <asm/processor.h>
>  #include <asm/hvm/vmx/vmcs.h>
> 
> @@ -134,12 +135,13 @@ extern bool untrusted_msi;
>  int pi_update_irte(const struct pi_desc *pi_desc, const struct pirq *pirq,
>                     const uint8_t gvec);
> 
> -#define iommu_sync_cache(addr, size) ({                 \
> -    const struct iommu_ops *ops = iommu_get_ops();      \
> -                                                        \
> -    if ( ops->sync_cache )                              \
> -        iommu_vcall(ops, sync_cache, addr, size);       \
> -})
> +extern bool iommu_non_coherent;
> +
> +static inline void iommu_sync_cache(const void *addr, unsigned int size)
> +{
> +    if ( iommu_non_coherent )
> +        cache_writeback(addr, size);
> +}
> 
>  int __must_check iommu_free_pgtables(struct domain *d);
>  struct page_info *__must_check iommu_alloc_pgtable(struct domain *d);
> --- a/xen/include/xen/iommu.h
> +++ b/xen/include/xen/iommu.h
> @@ -268,7 +268,6 @@ struct iommu_ops {
>      int (*setup_hpet_msi)(struct msi_desc *);
> 
>      int (*adjust_irq_affinities)(void);
> -    void (*sync_cache)(const void *addr, unsigned int size);
>      void (*clear_root_pgtable)(struct domain *d);
>      int (*update_ire_from_msi)(struct msi_desc *msi_desc, struct msi_msg
> *msg);
>  #endif /* CONFIG_X86 */
> --- a/xen/drivers/passthrough/vtd/extern.h
> +++ b/xen/drivers/passthrough/vtd/extern.h
> @@ -78,7 +78,6 @@ int __must_check qinval_device_iotlb_syn
>                                            struct pci_dev *pdev,
>                                            u16 did, u16 size, u64 addr);
> 
> -unsigned int get_cache_line_size(void);
>  void flush_all_cache(void);
> 
>  uint64_t alloc_pgtable_maddr(unsigned long npages, nodeid_t node);
> --- a/xen/drivers/passthrough/vtd/x86/vtd.c
> +++ b/xen/drivers/passthrough/vtd/x86/vtd.c
> @@ -47,11 +47,6 @@ void unmap_vtd_domain_page(const void *v
>      unmap_domain_page(va);
>  }
> 
> -unsigned int get_cache_line_size(void)
> -{
> -    return ((cpuid_ebx(1) >> 8) & 0xff) * 8;
> -}
> -
>  void flush_all_cache()
>  {
>      wbinvd();


^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: [PATCH v2 3/4] VT-d: replace flush_all_cache()
  2022-01-27 14:49 ` [PATCH v2 3/4] VT-d: replace flush_all_cache() Jan Beulich
  2022-01-28  9:33   ` Durrant, Paul
@ 2022-02-18  5:35   ` Tian, Kevin
  1 sibling, 0 replies; 15+ messages in thread
From: Tian, Kevin @ 2022-02-18  5:35 UTC (permalink / raw)
  To: Beulich, Jan, xen-devel; +Cc: Paul Durrant, Cooper, Andrew

> From: Jan Beulich
> Sent: Thursday, January 27, 2022 10:49 PM
> 
> Let's use infrastructure we have available instead of an open-coded
> wbinvd() invocation.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: Kevin Tian <kevin.tian@intel.com>

> 
> --- a/xen/drivers/passthrough/vtd/extern.h
> +++ b/xen/drivers/passthrough/vtd/extern.h
> @@ -78,8 +78,6 @@ int __must_check qinval_device_iotlb_syn
>                                            struct pci_dev *pdev,
>                                            u16 did, u16 size, u64 addr);
> 
> -void flush_all_cache(void);
> -
>  uint64_t alloc_pgtable_maddr(unsigned long npages, nodeid_t node);
>  void free_pgtable_maddr(u64 maddr);
>  void *map_vtd_domain_page(u64 maddr);
> --- a/xen/drivers/passthrough/vtd/iommu.c
> +++ b/xen/drivers/passthrough/vtd/iommu.c
> @@ -623,7 +623,8 @@ static int __must_check iommu_flush_all(
>      bool_t flush_dev_iotlb;
>      int rc = 0;
> 
> -    flush_all_cache();
> +    flush_local(FLUSH_CACHE);
> +
>      for_each_drhd_unit ( drhd )
>      {
>          int context_rc, iotlb_rc;
> --- a/xen/drivers/passthrough/vtd/x86/vtd.c
> +++ b/xen/drivers/passthrough/vtd/x86/vtd.c
> @@ -46,8 +46,3 @@ void unmap_vtd_domain_page(const void *v
>  {
>      unmap_domain_page(va);
>  }
> -
> -void flush_all_cache()
> -{
> -    wbinvd();
> -}
> 


^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: [PATCH v2 4/4] IOMMU/PCI: propagate get_device_group_id() failure
  2022-01-27 14:49 ` [PATCH v2 4/4] IOMMU/PCI: propagate get_device_group_id() failure Jan Beulich
  2022-01-28  9:37   ` Durrant, Paul
@ 2022-02-18  5:42   ` Tian, Kevin
  1 sibling, 0 replies; 15+ messages in thread
From: Tian, Kevin @ 2022-02-18  5:42 UTC (permalink / raw)
  To: Beulich, Jan, xen-devel; +Cc: Paul Durrant, Cooper, Andrew

> From: Jan Beulich <jbeulich@suse.com>
> Sent: Thursday, January 27, 2022 10:50 PM
> 
> The VT-d hook can indicate an error, which shouldn't be ignored. Convert
> the hook's return value to a proper error code, and let that bubble up.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> I'm not convinced of the XSM related behavior here: It's neither clear
> why the check gets performed on the possible further group members
> instead of on the passed in device, nor - if the former is indeed
> intended behavior - why the loop then simply gets continued instead of
> returning an error. After all in such a case the reported "group" is
> actually incomplete, which can't result in anything good.
> 
> I'm further unconvinced that it is correct for the AMD hook to never
> return an error.

I also have this question on the AMD side. In concept that check should
be x86 vendor agnostic.

but this change is good in its context:

	Reviewed-by: Kevin Tian <kevin.tian@intel.com>

> ---
> v2: New.
> 
> --- a/xen/drivers/passthrough/pci.c
> +++ b/xen/drivers/passthrough/pci.c
> @@ -1463,6 +1463,8 @@ static int iommu_get_device_group(
>          return 0;
> 
>      group_id = iommu_call(ops, get_device_group_id, seg, bus, devfn);
> +    if ( group_id < 0 )
> +        return group_id;
> 
>      pcidevs_lock();
>      for_each_pdev( d, pdev )
> @@ -1477,6 +1479,12 @@ static int iommu_get_device_group(
>              continue;
> 
>          sdev_id = iommu_call(ops, get_device_group_id, seg, b, df);
> +        if ( sdev_id < 0 )
> +        {
> +            pcidevs_unlock();
> +            return sdev_id;
> +        }
> +
>          if ( (sdev_id == group_id) && (i < max_sdevs) )
>          {
>              bdf = (b << 16) | (df << 8);
> @@ -1484,7 +1492,7 @@ static int iommu_get_device_group(
>              if ( unlikely(copy_to_guest_offset(buf, i, &bdf, 1)) )
>              {
>                  pcidevs_unlock();
> -                return -1;
> +                return -EFAULT;
>              }
>              i++;
>          }
> @@ -1552,8 +1560,7 @@ int iommu_do_pci_domctl(
>          ret = iommu_get_device_group(d, seg, bus, devfn, sdevs, max_sdevs);
>          if ( ret < 0 )
>          {
> -            dprintk(XENLOG_ERR, "iommu_get_device_group() failed!\n");
> -            ret = -EFAULT;
> +            dprintk(XENLOG_ERR, "iommu_get_device_group() failed: %d\n", ret);
>              domctl->u.get_device_group.num_sdevs = 0;
>          }
>          else
> --- a/xen/drivers/passthrough/vtd/iommu.c
> +++ b/xen/drivers/passthrough/vtd/iommu.c
> @@ -2564,10 +2564,11 @@ static int intel_iommu_assign_device(
>  static int intel_iommu_group_id(u16 seg, u8 bus, u8 devfn)
>  {
>      u8 secbus;
> +
>      if ( find_upstream_bridge(seg, &bus, &devfn, &secbus) < 0 )
> -        return -1;
> -    else
> -        return PCI_BDF2(bus, devfn);
> +        return -ENODEV;
> +
> +    return PCI_BDF2(bus, devfn);
>  }
> 
>  static int __must_check vtd_suspend(void)


^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2022-02-18  5:43 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-01-27 14:46 [PATCH v2 0/4] (mainly) IOMMU hook adjustments Jan Beulich
2022-01-27 14:47 ` [PATCH v2 1/4] IOMMU/x86: switch to alternatives-call patching in further instances Jan Beulich
2022-01-28  9:28   ` Durrant, Paul
2022-01-28 10:36     ` Jan Beulich
2022-01-28 10:54       ` Andrew Cooper
2022-01-28 10:40   ` Rahul Singh
2022-01-27 14:48 ` [PATCH v2 2/4] VT-d / x86: re-arrange cache syncing Jan Beulich
2022-01-28  9:32   ` Durrant, Paul
2022-02-18  5:34   ` Tian, Kevin
2022-01-27 14:49 ` [PATCH v2 3/4] VT-d: replace flush_all_cache() Jan Beulich
2022-01-28  9:33   ` Durrant, Paul
2022-02-18  5:35   ` Tian, Kevin
2022-01-27 14:49 ` [PATCH v2 4/4] IOMMU/PCI: propagate get_device_group_id() failure Jan Beulich
2022-01-28  9:37   ` Durrant, Paul
2022-02-18  5:42   ` Tian, Kevin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.