All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/4] (mainly) IOMMU hook adjustments
@ 2021-12-01  9:35 Jan Beulich
  2021-12-01  9:39 ` [PATCH 1/4] IOMMU/x86: switch to alternatives-call patching in further instances Jan Beulich
                   ` (3 more replies)
  0 siblings, 4 replies; 16+ messages in thread
From: Jan Beulich @ 2021-12-01  9:35 UTC (permalink / raw)
  To: xen-devel; +Cc: Paul Durrant, Andrew Cooper

Besides the altcall conversion, some further adjustments appeared desirable
to do while touching that area.

1: IOMMU/x86: switch to alternatives-call patching in further instances
2: VT-d / x86: re-arrange cache syncing
3: VT-d: replace flush_all_cache()
4: libxc: correct bounce direction in xc_get_device_group()

Jan



^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 1/4] IOMMU/x86: switch to alternatives-call patching in further instances
  2021-12-01  9:35 [PATCH 0/4] (mainly) IOMMU hook adjustments Jan Beulich
@ 2021-12-01  9:39 ` Jan Beulich
  2021-12-01 15:13   ` Andrew Cooper
  2021-12-01  9:40 ` [PATCH 2/4] VT-d / x86: re-arrange cache syncing Jan Beulich
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 16+ messages in thread
From: Jan Beulich @ 2021-12-01  9:39 UTC (permalink / raw)
  To: xen-devel; +Cc: Paul Durrant, Andrew Cooper

This is, once again, to limit the number of indirect calls as much as
possible. The only hook invocation which isn't sensible to convert is
setup(). And of course Arm-only use sites are left alone as well.

Note regarding the introduction / use of local variables in pci.c:
struct pci_dev's involved fields are const. This const propagates, via
typeof(), to the local helper variables in the altcall macros. These
helper variables are, however, used as outputs (and hence can't be
const). In iommu_get_device_group() make use of the new local variables
to also simplify some adjacent code.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -198,7 +198,7 @@ int iommu_domain_init(struct domain *d,
         return ret;
 
     hd->platform_ops = iommu_get_ops();
-    ret = hd->platform_ops->init(d);
+    ret = iommu_call(hd->platform_ops, init, d);
     if ( ret || is_system_domain(d) )
         return ret;
 
@@ -233,7 +233,7 @@ void __hwdom_init iommu_hwdom_init(struc
 
     register_keyhandler('o', &iommu_dump_page_tables, "dump iommu page tables", 0);
 
-    hd->platform_ops->hwdom_init(d);
+    iommu_vcall(hd->platform_ops, hwdom_init, d);
 }
 
 static void iommu_teardown(struct domain *d)
@@ -576,7 +576,7 @@ int iommu_get_reserved_device_memory(iom
     if ( !ops->get_reserved_device_memory )
         return 0;
 
-    return ops->get_reserved_device_memory(func, ctxt);
+    return iommu_call(ops, get_reserved_device_memory, func, ctxt);
 }
 
 bool_t iommu_has_feature(struct domain *d, enum iommu_feature feature)
@@ -603,7 +603,7 @@ static void iommu_dump_page_tables(unsig
             continue;
         }
 
-        dom_iommu(d)->platform_ops->dump_page_tables(d);
+        iommu_vcall(dom_iommu(d)->platform_ops, dump_page_tables, d);
     }
 
     rcu_read_unlock(&domlist_read_lock);
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -861,15 +861,15 @@ static int deassign_device(struct domain
         devfn += pdev->phantom_stride;
         if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
             break;
-        ret = hd->platform_ops->reassign_device(d, target, devfn,
-                                                pci_to_dev(pdev));
+        ret = iommu_call(hd->platform_ops, reassign_device, d, target, devfn,
+                         pci_to_dev(pdev));
         if ( ret )
             goto out;
     }
 
     devfn = pdev->devfn;
-    ret = hd->platform_ops->reassign_device(d, target, devfn,
-                                            pci_to_dev(pdev));
+    ret = iommu_call(hd->platform_ops, reassign_device, d, target, devfn,
+                     pci_to_dev(pdev));
     if ( ret )
         goto out;
 
@@ -1300,7 +1300,7 @@ static int iommu_add_device(struct pci_d
 {
     const struct domain_iommu *hd;
     int rc;
-    u8 devfn;
+    unsigned int devfn = pdev->devfn;
 
     if ( !pdev->domain )
         return -EINVAL;
@@ -1311,16 +1311,16 @@ static int iommu_add_device(struct pci_d
     if ( !is_iommu_enabled(pdev->domain) )
         return 0;
 
-    rc = hd->platform_ops->add_device(pdev->devfn, pci_to_dev(pdev));
+    rc = iommu_call(hd->platform_ops, add_device, devfn, pci_to_dev(pdev));
     if ( rc || !pdev->phantom_stride )
         return rc;
 
-    for ( devfn = pdev->devfn ; ; )
+    for ( ; ; )
     {
         devfn += pdev->phantom_stride;
         if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
             return 0;
-        rc = hd->platform_ops->add_device(devfn, pci_to_dev(pdev));
+        rc = iommu_call(hd->platform_ops, add_device, devfn, pci_to_dev(pdev));
         if ( rc )
             printk(XENLOG_WARNING "IOMMU: add %pp failed (%d)\n",
                    &pdev->sbdf, rc);
@@ -1341,7 +1341,7 @@ static int iommu_enable_device(struct pc
          !hd->platform_ops->enable_device )
         return 0;
 
-    return hd->platform_ops->enable_device(pci_to_dev(pdev));
+    return iommu_call(hd->platform_ops, enable_device, pci_to_dev(pdev));
 }
 
 static int iommu_remove_device(struct pci_dev *pdev)
@@ -1363,7 +1363,8 @@ static int iommu_remove_device(struct pc
         devfn += pdev->phantom_stride;
         if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
             break;
-        rc = hd->platform_ops->remove_device(devfn, pci_to_dev(pdev));
+        rc = iommu_call(hd->platform_ops, remove_device, devfn,
+                        pci_to_dev(pdev));
         if ( !rc )
             continue;
 
@@ -1371,7 +1372,9 @@ static int iommu_remove_device(struct pc
         return rc;
     }
 
-    return hd->platform_ops->remove_device(pdev->devfn, pci_to_dev(pdev));
+    devfn = pdev->devfn;
+
+    return iommu_call(hd->platform_ops, remove_device, devfn, pci_to_dev(pdev));
 }
 
 static int device_assigned(u16 seg, u8 bus, u8 devfn)
@@ -1421,7 +1424,8 @@ static int assign_device(struct domain *
 
     pdev->fault.count = 0;
 
-    if ( (rc = hd->platform_ops->assign_device(d, devfn, pci_to_dev(pdev), flag)) )
+    if ( (rc = iommu_call(hd->platform_ops, assign_device, d, devfn,
+                          pci_to_dev(pdev), flag)) )
         goto done;
 
     for ( ; pdev->phantom_stride; rc = 0 )
@@ -1429,7 +1433,8 @@ static int assign_device(struct domain *
         devfn += pdev->phantom_stride;
         if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
             break;
-        rc = hd->platform_ops->assign_device(d, devfn, pci_to_dev(pdev), flag);
+        rc = iommu_call(hd->platform_ops, assign_device, d, devfn,
+                        pci_to_dev(pdev), flag);
     }
 
  done:
@@ -1457,24 +1462,24 @@ static int iommu_get_device_group(
     if ( !is_iommu_enabled(d) || !ops->get_device_group_id )
         return 0;
 
-    group_id = ops->get_device_group_id(seg, bus, devfn);
+    group_id = iommu_call(ops, get_device_group_id, seg, bus, devfn);
 
     pcidevs_lock();
     for_each_pdev( d, pdev )
     {
-        if ( (pdev->seg != seg) ||
-             ((pdev->bus == bus) && (pdev->devfn == devfn)) )
+        unsigned int b = pdev->bus;
+        unsigned int df = pdev->devfn;
+
+        if ( (pdev->seg != seg) || ((b == bus) && (df == devfn)) )
             continue;
 
-        if ( xsm_get_device_group(XSM_HOOK, (seg << 16) | (pdev->bus << 8) | pdev->devfn) )
+        if ( xsm_get_device_group(XSM_HOOK, (seg << 16) | (b << 8) | df) )
             continue;
 
-        sdev_id = ops->get_device_group_id(seg, pdev->bus, pdev->devfn);
+        sdev_id = iommu_call(ops, get_device_group_id, seg, b, df);
         if ( (sdev_id == group_id) && (i < max_sdevs) )
         {
-            bdf = 0;
-            bdf |= (pdev->bus & 0xff) << 16;
-            bdf |= (pdev->devfn & 0xff) << 8;
+            bdf = (b << 16) | (df << 8);
 
             if ( unlikely(copy_to_guest_offset(buf, i, &bdf, 1)) )
             {
--- a/xen/drivers/passthrough/x86/iommu.c
+++ b/xen/drivers/passthrough/x86/iommu.c
@@ -142,7 +142,7 @@ unsigned int iommu_read_apic_from_ire(un
 int __init iommu_setup_hpet_msi(struct msi_desc *msi)
 {
     const struct iommu_ops *ops = iommu_get_ops();
-    return ops->setup_hpet_msi ? ops->setup_hpet_msi(msi) : -ENODEV;
+    return ops->setup_hpet_msi ? iommu_call(ops, setup_hpet_msi, msi) : -ENODEV;
 }
 
 void __hwdom_init arch_iommu_check_autotranslated_hwdom(struct domain *d)
@@ -403,7 +403,7 @@ int iommu_free_pgtables(struct domain *d
      * Pages will be moved to the free list below. So we want to
      * clear the root page-table to avoid any potential use after-free.
      */
-    hd->platform_ops->clear_root_pgtable(d);
+    iommu_vcall(hd->platform_ops, clear_root_pgtable, d);
 
     while ( (pg = page_list_remove_head(&hd->arch.pgtables.list)) )
     {



^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 2/4] VT-d / x86: re-arrange cache syncing
  2021-12-01  9:35 [PATCH 0/4] (mainly) IOMMU hook adjustments Jan Beulich
  2021-12-01  9:39 ` [PATCH 1/4] IOMMU/x86: switch to alternatives-call patching in further instances Jan Beulich
@ 2021-12-01  9:40 ` Jan Beulich
  2021-12-01 14:39   ` Andrew Cooper
  2021-12-01  9:41 ` [PATCH 3/4] VT-d: replace flush_all_cache() Jan Beulich
  2021-12-01  9:42 ` [PATCH 4/4] libxc: correct bounce direction in xc_get_device_group() Jan Beulich
  3 siblings, 1 reply; 16+ messages in thread
From: Jan Beulich @ 2021-12-01  9:40 UTC (permalink / raw)
  To: xen-devel
  Cc: Paul Durrant, Andrew Cooper, Wei Liu, Roger Pau Monné, Kevin Tian

The actual function should always have lived in core x86 code; move it
there, replacing get_cache_line_size() by readily available (except very
early during boot; see the code comment) data.

Drop the respective IOMMU hook, (re)introducing a respective boolean
instead. Replace a true and an almost open-coding instance of
iommu_sync_cache().

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
Placing the function next to flush_area_local() exposes a curious
asymmetry between the SFENCE placements: sync_cache() has it after the
flush, while flush_area_local() has it before it. I think the latter one
is misplaced.

--- a/xen/arch/x86/flushtlb.c
+++ b/xen/arch/x86/flushtlb.c
@@ -11,6 +11,7 @@
 #include <xen/sched.h>
 #include <xen/smp.h>
 #include <xen/softirq.h>
+#include <asm/cache.h>
 #include <asm/flushtlb.h>
 #include <asm/invpcid.h>
 #include <asm/nops.h>
@@ -265,6 +266,57 @@ unsigned int flush_area_local(const void
     return flags;
 }
 
+void sync_cache(const void *addr, unsigned int size)
+{
+    /*
+     * This function may be called before current_cpu_data is established.
+     * Hence a fallback is needed to prevent the loop below becoming infinite.
+     */
+    unsigned int clflush_size = current_cpu_data.x86_clflush_size ?: 16;
+    const void *end = addr + size;
+
+    addr -= (unsigned long)addr & (clflush_size - 1);
+    for ( ; addr < end; addr += clflush_size )
+    {
+/*
+ * The arguments to a macro must not include preprocessor directives. Doing so
+ * results in undefined behavior, so we have to create some defines here in
+ * order to avoid it.
+ */
+#if defined(HAVE_AS_CLWB)
+# define CLWB_ENCODING "clwb %[p]"
+#elif defined(HAVE_AS_XSAVEOPT)
+# define CLWB_ENCODING "data16 xsaveopt %[p]" /* clwb */
+#else
+# define CLWB_ENCODING ".byte 0x66, 0x0f, 0xae, 0x30" /* clwb (%%rax) */
+#endif
+
+#define BASE_INPUT(addr) [p] "m" (*(const char *)(addr))
+#if defined(HAVE_AS_CLWB) || defined(HAVE_AS_XSAVEOPT)
+# define INPUT BASE_INPUT
+#else
+# define INPUT(addr) "a" (addr), BASE_INPUT(addr)
+#endif
+        /*
+         * Note regarding the use of NOP_DS_PREFIX: it's faster to do a clflush
+         * + prefix than a clflush + nop, and hence the prefix is added instead
+         * of letting the alternative framework fill the gap by appending nops.
+         */
+        alternative_io_2(".byte " __stringify(NOP_DS_PREFIX) "; clflush %[p]",
+                         "data16 clflush %[p]", /* clflushopt */
+                         X86_FEATURE_CLFLUSHOPT,
+                         CLWB_ENCODING,
+                         X86_FEATURE_CLWB, /* no outputs */,
+                         INPUT(addr));
+#undef INPUT
+#undef BASE_INPUT
+#undef CLWB_ENCODING
+    }
+
+    alternative_2("", "sfence", X86_FEATURE_CLFLUSHOPT,
+                      "sfence", X86_FEATURE_CLWB);
+}
+
 unsigned int guest_flush_tlb_flags(const struct domain *d)
 {
     bool shadow = paging_mode_shadow(d);
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -202,54 +202,6 @@ static void check_cleanup_domid_map(stru
     }
 }
 
-static void sync_cache(const void *addr, unsigned int size)
-{
-    static unsigned long clflush_size = 0;
-    const void *end = addr + size;
-
-    if ( clflush_size == 0 )
-        clflush_size = get_cache_line_size();
-
-    addr -= (unsigned long)addr & (clflush_size - 1);
-    for ( ; addr < end; addr += clflush_size )
-/*
- * The arguments to a macro must not include preprocessor directives. Doing so
- * results in undefined behavior, so we have to create some defines here in
- * order to avoid it.
- */
-#if defined(HAVE_AS_CLWB)
-# define CLWB_ENCODING "clwb %[p]"
-#elif defined(HAVE_AS_XSAVEOPT)
-# define CLWB_ENCODING "data16 xsaveopt %[p]" /* clwb */
-#else
-# define CLWB_ENCODING ".byte 0x66, 0x0f, 0xae, 0x30" /* clwb (%%rax) */
-#endif
-
-#define BASE_INPUT(addr) [p] "m" (*(const char *)(addr))
-#if defined(HAVE_AS_CLWB) || defined(HAVE_AS_XSAVEOPT)
-# define INPUT BASE_INPUT
-#else
-# define INPUT(addr) "a" (addr), BASE_INPUT(addr)
-#endif
-        /*
-         * Note regarding the use of NOP_DS_PREFIX: it's faster to do a clflush
-         * + prefix than a clflush + nop, and hence the prefix is added instead
-         * of letting the alternative framework fill the gap by appending nops.
-         */
-        alternative_io_2(".byte " __stringify(NOP_DS_PREFIX) "; clflush %[p]",
-                         "data16 clflush %[p]", /* clflushopt */
-                         X86_FEATURE_CLFLUSHOPT,
-                         CLWB_ENCODING,
-                         X86_FEATURE_CLWB, /* no outputs */,
-                         INPUT(addr));
-#undef INPUT
-#undef BASE_INPUT
-#undef CLWB_ENCODING
-
-    alternative_2("", "sfence", X86_FEATURE_CLFLUSHOPT,
-                      "sfence", X86_FEATURE_CLWB);
-}
-
 /* Allocate page table, return its machine address */
 uint64_t alloc_pgtable_maddr(unsigned long npages, nodeid_t node)
 {
@@ -268,8 +220,7 @@ uint64_t alloc_pgtable_maddr(unsigned lo
 
         clear_page(vaddr);
 
-        if ( (iommu_ops.init ? &iommu_ops : &vtd_ops)->sync_cache )
-            sync_cache(vaddr, PAGE_SIZE);
+        iommu_sync_cache(vaddr, PAGE_SIZE);
         unmap_domain_page(vaddr);
         cur_pg++;
     }
@@ -1295,7 +1246,7 @@ int __init iommu_alloc(struct acpi_drhd_
     iommu->nr_pt_levels = agaw_to_level(agaw);
 
     if ( !ecap_coherent(iommu->ecap) )
-        vtd_ops.sync_cache = sync_cache;
+        iommu_non_coherent = true;
 
     /* allocate domain id bitmap */
     nr_dom = cap_ndoms(iommu->cap);
--- a/xen/drivers/passthrough/x86/iommu.c
+++ b/xen/drivers/passthrough/x86/iommu.c
@@ -28,6 +28,7 @@
 
 const struct iommu_init_ops *__initdata iommu_init_ops;
 struct iommu_ops __read_mostly iommu_ops;
+bool __read_mostly iommu_non_coherent;
 
 enum iommu_intremap __read_mostly iommu_intremap = iommu_intremap_full;
 
@@ -435,8 +436,7 @@ struct page_info *iommu_alloc_pgtable(st
     p = __map_domain_page(pg);
     clear_page(p);
 
-    if ( hd->platform_ops->sync_cache )
-        iommu_vcall(hd->platform_ops, sync_cache, p, PAGE_SIZE);
+    iommu_sync_cache(p, PAGE_SIZE);
 
     unmap_domain_page(p);
 
--- a/xen/include/asm-x86/cache.h
+++ b/xen/include/asm-x86/cache.h
@@ -11,4 +11,10 @@
 
 #define __read_mostly __section(".data.read_mostly")
 
+#ifndef __ASSEMBLY__
+
+void sync_cache(const void *addr, unsigned int size);
+
+#endif
+
 #endif
--- a/xen/include/asm-x86/iommu.h
+++ b/xen/include/asm-x86/iommu.h
@@ -19,6 +19,7 @@
 #include <xen/mem_access.h>
 #include <xen/spinlock.h>
 #include <asm/apicdef.h>
+#include <asm/cache.h>
 #include <asm/processor.h>
 #include <asm/hvm/vmx/vmcs.h>
 
@@ -134,12 +135,13 @@ extern bool untrusted_msi;
 int pi_update_irte(const struct pi_desc *pi_desc, const struct pirq *pirq,
                    const uint8_t gvec);
 
-#define iommu_sync_cache(addr, size) ({                 \
-    const struct iommu_ops *ops = iommu_get_ops();      \
-                                                        \
-    if ( ops->sync_cache )                              \
-        iommu_vcall(ops, sync_cache, addr, size);       \
-})
+extern bool iommu_non_coherent;
+
+static inline void iommu_sync_cache(const void *addr, unsigned int size)
+{
+    if ( iommu_non_coherent )
+        sync_cache(addr, size);
+}
 
 int __must_check iommu_free_pgtables(struct domain *d);
 struct page_info *__must_check iommu_alloc_pgtable(struct domain *d);
--- a/xen/include/xen/iommu.h
+++ b/xen/include/xen/iommu.h
@@ -268,7 +268,6 @@ struct iommu_ops {
     int (*setup_hpet_msi)(struct msi_desc *);
 
     int (*adjust_irq_affinities)(void);
-    void (*sync_cache)(const void *addr, unsigned int size);
     void (*clear_root_pgtable)(struct domain *d);
     int (*update_ire_from_msi)(struct msi_desc *msi_desc, struct msi_msg *msg);
 #endif /* CONFIG_X86 */
--- a/xen/drivers/passthrough/vtd/extern.h
+++ b/xen/drivers/passthrough/vtd/extern.h
@@ -76,7 +76,6 @@ int __must_check qinval_device_iotlb_syn
                                           struct pci_dev *pdev,
                                           u16 did, u16 size, u64 addr);
 
-unsigned int get_cache_line_size(void);
 void flush_all_cache(void);
 
 uint64_t alloc_pgtable_maddr(unsigned long npages, nodeid_t node);
--- a/xen/drivers/passthrough/vtd/x86/vtd.c
+++ b/xen/drivers/passthrough/vtd/x86/vtd.c
@@ -47,11 +47,6 @@ void unmap_vtd_domain_page(const void *v
     unmap_domain_page(va);
 }
 
-unsigned int get_cache_line_size(void)
-{
-    return ((cpuid_ebx(1) >> 8) & 0xff) * 8;
-}
-
 void flush_all_cache()
 {
     wbinvd();



^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 3/4] VT-d: replace flush_all_cache()
  2021-12-01  9:35 [PATCH 0/4] (mainly) IOMMU hook adjustments Jan Beulich
  2021-12-01  9:39 ` [PATCH 1/4] IOMMU/x86: switch to alternatives-call patching in further instances Jan Beulich
  2021-12-01  9:40 ` [PATCH 2/4] VT-d / x86: re-arrange cache syncing Jan Beulich
@ 2021-12-01  9:41 ` Jan Beulich
  2021-12-01 13:02   ` Andrew Cooper
  2021-12-01  9:42 ` [PATCH 4/4] libxc: correct bounce direction in xc_get_device_group() Jan Beulich
  3 siblings, 1 reply; 16+ messages in thread
From: Jan Beulich @ 2021-12-01  9:41 UTC (permalink / raw)
  To: xen-devel; +Cc: Paul Durrant, Kevin Tian

Let's use infrastructure we have available instead of an open-coded
wbinvd() invocation.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/drivers/passthrough/vtd/extern.h
+++ b/xen/drivers/passthrough/vtd/extern.h
@@ -76,8 +76,6 @@ int __must_check qinval_device_iotlb_syn
                                           struct pci_dev *pdev,
                                           u16 did, u16 size, u64 addr);
 
-void flush_all_cache(void);
-
 uint64_t alloc_pgtable_maddr(unsigned long npages, nodeid_t node);
 void free_pgtable_maddr(u64 maddr);
 void *map_vtd_domain_page(u64 maddr);
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -591,7 +591,8 @@ static int __must_check iommu_flush_all(
     bool_t flush_dev_iotlb;
     int rc = 0;
 
-    flush_all_cache();
+    flush_local(FLUSH_CACHE);
+
     for_each_drhd_unit ( drhd )
     {
         int context_rc, iotlb_rc;
--- a/xen/drivers/passthrough/vtd/x86/vtd.c
+++ b/xen/drivers/passthrough/vtd/x86/vtd.c
@@ -46,8 +46,3 @@ void unmap_vtd_domain_page(const void *v
 {
     unmap_domain_page(va);
 }
-
-void flush_all_cache()
-{
-    wbinvd();
-}



^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 4/4] libxc: correct bounce direction in xc_get_device_group()
  2021-12-01  9:35 [PATCH 0/4] (mainly) IOMMU hook adjustments Jan Beulich
                   ` (2 preceding siblings ...)
  2021-12-01  9:41 ` [PATCH 3/4] VT-d: replace flush_all_cache() Jan Beulich
@ 2021-12-01  9:42 ` Jan Beulich
  2021-12-01 12:11   ` Juergen Gross
  2021-12-01 15:11   ` Andrew Cooper
  3 siblings, 2 replies; 16+ messages in thread
From: Jan Beulich @ 2021-12-01  9:42 UTC (permalink / raw)
  To: xen-devel; +Cc: Paul Durrant, Ian Jackson, Wei Liu, Juergen Gross

The array of IDs is an output.

Fixes: 79647c5bc9c6 ("libxc: convert domctl interfaces over to hypercall buffers")
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
Clearly the function, including its Python wrapper, cannot have been
used by anything for many years. I wonder whether that isn't good enough
a reason to sanitize the layout of the array elements: Right now they
have BDF in bits 8...23, when conventionally this would be bits 0...15.

--- a/tools/libs/ctrl/xc_domain.c
+++ b/tools/libs/ctrl/xc_domain.c
@@ -1546,7 +1546,8 @@ int xc_get_device_group(
 {
     int rc;
     DECLARE_DOMCTL;
-    DECLARE_HYPERCALL_BOUNCE(sdev_array, max_sdevs * sizeof(*sdev_array), XC_HYPERCALL_BUFFER_BOUNCE_IN);
+    DECLARE_HYPERCALL_BOUNCE(sdev_array, max_sdevs * sizeof(*sdev_array),
+                             XC_HYPERCALL_BUFFER_BOUNCE_OUT);
 
     if ( xc_hypercall_bounce_pre(xch, sdev_array) )
     {



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 4/4] libxc: correct bounce direction in xc_get_device_group()
  2021-12-01  9:42 ` [PATCH 4/4] libxc: correct bounce direction in xc_get_device_group() Jan Beulich
@ 2021-12-01 12:11   ` Juergen Gross
  2021-12-01 15:11   ` Andrew Cooper
  1 sibling, 0 replies; 16+ messages in thread
From: Juergen Gross @ 2021-12-01 12:11 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: Paul Durrant, Ian Jackson, Wei Liu


[-- Attachment #1.1.1: Type: text/plain, Size: 274 bytes --]

On 01.12.21 10:42, Jan Beulich wrote:
> The array of IDs is an output.
> 
> Fixes: 79647c5bc9c6 ("libxc: convert domctl interfaces over to hypercall buffers")
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: Juergen Gross <jgross@suse.com>


Juergen

[-- Attachment #1.1.2: OpenPGP public key --]
[-- Type: application/pgp-keys, Size: 3135 bytes --]

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 495 bytes --]

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/4] VT-d: replace flush_all_cache()
  2021-12-01  9:41 ` [PATCH 3/4] VT-d: replace flush_all_cache() Jan Beulich
@ 2021-12-01 13:02   ` Andrew Cooper
  2021-12-02  8:47     ` Jan Beulich
  0 siblings, 1 reply; 16+ messages in thread
From: Andrew Cooper @ 2021-12-01 13:02 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: Paul Durrant, Kevin Tian

On 01/12/2021 09:41, Jan Beulich wrote:
> --- a/xen/drivers/passthrough/vtd/iommu.c
> +++ b/xen/drivers/passthrough/vtd/iommu.c
> @@ -591,7 +591,8 @@ static int __must_check iommu_flush_all(
>      bool_t flush_dev_iotlb;
>      int rc = 0;
>  
> -    flush_all_cache();
> +    flush_local(FLUSH_CACHE);

While I agree that the conversion is an improvement, the logic still
looks totally bogus.

I can believe that it might have been a stopgap to fix problems before
we identified the need for sync_cache() for non-coherent IOMMUs, but
there's no need I can spot for any WBINVDs on any of these paths.

I'm fairly sure this should just be dropped, and Xen will get faster as
a result.

~Andrew


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/4] VT-d / x86: re-arrange cache syncing
  2021-12-01  9:40 ` [PATCH 2/4] VT-d / x86: re-arrange cache syncing Jan Beulich
@ 2021-12-01 14:39   ` Andrew Cooper
  2021-12-02  9:19     ` Jan Beulich
  0 siblings, 1 reply; 16+ messages in thread
From: Andrew Cooper @ 2021-12-01 14:39 UTC (permalink / raw)
  To: Jan Beulich, xen-devel
  Cc: Paul Durrant, Andrew Cooper, Wei Liu, Roger Pau Monné, Kevin Tian

On 01/12/2021 09:40, Jan Beulich wrote:
> The actual function should always have lived in core x86 code; move it
> there, replacing get_cache_line_size() by readily available (except very
> early during boot; see the code comment) data.
>
> Drop the respective IOMMU hook, (re)introducing a respective boolean
> instead. Replace a true and an almost open-coding instance of
> iommu_sync_cache().

Coherency (or not) is a per-IOMMU property, not a global platform
property.  iGPU IOMMUs are very different to those the uncore, and
there's no reason to presume that IOMMUs in the PCH would have the same
properties as those in the uncore.

Given how expensive sync_cache() is, we cannot afford to be using it for
coherent IOMMUs in a mixed system.

This wants to be a boolean in arch_iommu.


> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> Placing the function next to flush_area_local() exposes a curious
> asymmetry between the SFENCE placements: sync_cache() has it after the
> flush, while flush_area_local() has it before it. I think the latter one
> is misplaced.

Wow this is a mess.

First and foremost, AMD state that on pre-CLFLUSHOPT parts, CLFLUSH is
unordered with ~anything (including SFENCE), and need bounding with
MFENCE on both sides.  We definitely aren't doing this correctly right now.


AMD explicitly states that SFENCE drains the store and WC buffers (i.e.
make stuff instantaneously globally visible).  Intel does not, and
merely guarantees ordering.

A leading SFENCE would only make sense if there were WC concerns, but
both manuals say that the memory type doesn't matter, so I can't see a
justification for it.

What does matter, from the IOMMU's point of view, is that the line has
been written back (or evicted on pre-CLWB parts) before the IOTLB
invalidation occurs.  The invalidation will be a write to a different
address, which is why the trailing SFENCE is necessary, as CLFLUSHOPT
isn't ordered with respect to unaliasing writes.


On a more minor note, both Intel and AMD say that CLFLUSH* are permitted
to target an execute-only code segment, so we need a fix in
hvmemul_cache_op()'s use of hvmemul_virtual_to_linear(...,
hvm_access_read, ...) which will currently #GP in this case.

>
> --- a/xen/arch/x86/flushtlb.c
> +++ b/xen/arch/x86/flushtlb.c
> @@ -11,6 +11,7 @@
>  #include <xen/sched.h>
>  #include <xen/smp.h>
>  #include <xen/softirq.h>
> +#include <asm/cache.h>
>  #include <asm/flushtlb.h>
>  #include <asm/invpcid.h>
>  #include <asm/nops.h>
> @@ -265,6 +266,57 @@ unsigned int flush_area_local(const void
>      return flags;
>  }
>  
> +void sync_cache(const void *addr, unsigned int size)

Can we name this cache_writeback()?  sync is very generic, and it really
doesn't want confusing cache_flush().

~Andrew


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 4/4] libxc: correct bounce direction in xc_get_device_group()
  2021-12-01  9:42 ` [PATCH 4/4] libxc: correct bounce direction in xc_get_device_group() Jan Beulich
  2021-12-01 12:11   ` Juergen Gross
@ 2021-12-01 15:11   ` Andrew Cooper
  2021-12-02  7:30     ` Jan Beulich
  1 sibling, 1 reply; 16+ messages in thread
From: Andrew Cooper @ 2021-12-01 15:11 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: Paul Durrant, Ian Jackson, Wei Liu, Juergen Gross

On 01/12/2021 09:42, Jan Beulich wrote:
> The array of IDs is an output.
>
> Fixes: 79647c5bc9c6 ("libxc: convert domctl interfaces over to hypercall buffers")
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> Clearly the function, including its Python wrapper, cannot have been
> used by anything for many years. I wonder whether that isn't good enough
> a reason to sanitize the layout of the array elements: Right now they
> have BDF in bits 8...23, when conventionally this would be bits 0...15.

There is a lot of WTF with this hypercall.  It's obviously an attempt to
do the thing that Linux calls IOMMU groups now, except that the correct
way to do this would be for the group ID to be the unit of
assignment/deassignment.  (We need to do this anyway for other reasons.)

The last user was deleted with Xend (2013), which suggests that it was
broken for 3 years due to the incorrect bounce direction (2010).

Furthermore, it will arbitrarily fail if targetting domains without an
IOMMU configured, but everything else seems to be invariant of the
passed domain.  This should clearly be sysctl, not a domctl.


I suggest ripping all of this infrastructure out.  It's clearly unused
(and broken in Xen too - see patch 1), and not something which should be
used in this form in the future.

~Andrew


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/4] IOMMU/x86: switch to alternatives-call patching in further instances
  2021-12-01  9:39 ` [PATCH 1/4] IOMMU/x86: switch to alternatives-call patching in further instances Jan Beulich
@ 2021-12-01 15:13   ` Andrew Cooper
  2021-12-02  7:24     ` Jan Beulich
  0 siblings, 1 reply; 16+ messages in thread
From: Andrew Cooper @ 2021-12-01 15:13 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: Paul Durrant, Andrew Cooper

On 01/12/2021 09:39, Jan Beulich wrote:
> --- a/xen/drivers/passthrough/pci.c
> +++ b/xen/drivers/passthrough/pci.c
> @@ -1457,24 +1462,24 @@ static int iommu_get_device_group(
>      if ( !is_iommu_enabled(d) || !ops->get_device_group_id )
>          return 0;
>  
> -    group_id = ops->get_device_group_id(seg, bus, devfn);
> +    group_id = iommu_call(ops, get_device_group_id, seg, bus, devfn);

So I was going to suggest adjusting this to use more pci_sbdf_t, but the
Intel implementation can fail and return -1.

However, given the observations in patch 4, I'd instead recommend
dropping all of this.

Everything unrelated to get_device_group() looks fine.

~Andrew


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/4] IOMMU/x86: switch to alternatives-call patching in further instances
  2021-12-01 15:13   ` Andrew Cooper
@ 2021-12-02  7:24     ` Jan Beulich
  0 siblings, 0 replies; 16+ messages in thread
From: Jan Beulich @ 2021-12-02  7:24 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Paul Durrant, Andrew Cooper, xen-devel

On 01.12.2021 16:13, Andrew Cooper wrote:
> On 01/12/2021 09:39, Jan Beulich wrote:
>> --- a/xen/drivers/passthrough/pci.c
>> +++ b/xen/drivers/passthrough/pci.c
>> @@ -1457,24 +1462,24 @@ static int iommu_get_device_group(
>>      if ( !is_iommu_enabled(d) || !ops->get_device_group_id )
>>          return 0;
>>  
>> -    group_id = ops->get_device_group_id(seg, bus, devfn);
>> +    group_id = iommu_call(ops, get_device_group_id, seg, bus, devfn);
> 
> So I was going to suggest adjusting this to use more pci_sbdf_t, but the
> Intel implementation can fail and return -1.

How are the two aspects related? Wouldn't you mean the parameter of the
function to become pci_sbdf_t? If so, I'd view this as an orthogonal
change. I'll reply to the question of removal on the subthread of patch 4.

Jan



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 4/4] libxc: correct bounce direction in xc_get_device_group()
  2021-12-01 15:11   ` Andrew Cooper
@ 2021-12-02  7:30     ` Jan Beulich
  0 siblings, 0 replies; 16+ messages in thread
From: Jan Beulich @ 2021-12-02  7:30 UTC (permalink / raw)
  To: Andrew Cooper, Ian Jackson, Wei Liu, Paul Durrant
  Cc: xen-devel, Juergen Gross

On 01.12.2021 16:11, Andrew Cooper wrote:
> On 01/12/2021 09:42, Jan Beulich wrote:
>> The array of IDs is an output.
>>
>> Fixes: 79647c5bc9c6 ("libxc: convert domctl interfaces over to hypercall buffers")
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>> ---
>> Clearly the function, including its Python wrapper, cannot have been
>> used by anything for many years. I wonder whether that isn't good enough
>> a reason to sanitize the layout of the array elements: Right now they
>> have BDF in bits 8...23, when conventionally this would be bits 0...15.
> 
> There is a lot of WTF with this hypercall.  It's obviously an attempt to
> do the thing that Linux calls IOMMU groups now, except that the correct
> way to do this would be for the group ID to be the unit of
> assignment/deassignment.  (We need to do this anyway for other reasons.)
> 
> The last user was deleted with Xend (2013), which suggests that it was
> broken for 3 years due to the incorrect bounce direction (2010).
> 
> Furthermore, it will arbitrarily fail if targetting domains without an
> IOMMU configured, but everything else seems to be invariant of the
> passed domain.  This should clearly be sysctl, not a domctl.
> 
> 
> I suggest ripping all of this infrastructure out.  It's clearly unused
> (and broken in Xen too - see patch 1),

I've not seen you point out any breakage in reply to patch 1, unless you
mean VT-d's returning of -1 (which you didn't point out as broken, but
which I can see would lead to misbehavior).

> and not something which should be used in this form in the future.

I didn't think the concept here was wrong. What's missing is the tool
stack actually making use of this plus a way to do assignment in groups.
Iirc the latter was something Paul had started work on before leaving
Citrix? (That's leaving aside the bug you mention plus potential further
ones.)

Paul, Ian, Wei - what are your thoughts towards Andrew's proposal?

Jan



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/4] VT-d: replace flush_all_cache()
  2021-12-01 13:02   ` Andrew Cooper
@ 2021-12-02  8:47     ` Jan Beulich
  2021-12-24  7:28       ` Tian, Kevin
  0 siblings, 1 reply; 16+ messages in thread
From: Jan Beulich @ 2021-12-02  8:47 UTC (permalink / raw)
  To: Andrew Cooper, Kevin Tian; +Cc: Paul Durrant, xen-devel

On 01.12.2021 14:02, Andrew Cooper wrote:
> On 01/12/2021 09:41, Jan Beulich wrote:
>> --- a/xen/drivers/passthrough/vtd/iommu.c
>> +++ b/xen/drivers/passthrough/vtd/iommu.c
>> @@ -591,7 +591,8 @@ static int __must_check iommu_flush_all(
>>      bool_t flush_dev_iotlb;
>>      int rc = 0;
>>  
>> -    flush_all_cache();
>> +    flush_local(FLUSH_CACHE);
> 
> While I agree that the conversion is an improvement, the logic still
> looks totally bogus.
> 
> I can believe that it might have been a stopgap to fix problems before
> we identified the need for sync_cache() for non-coherent IOMMUs, but
> there's no need I can spot for any WBINVDs on any of these paths.
> 
> I'm fairly sure this should just be dropped, and Xen will get faster as
> a result.

Kevin, thoughts? I have to admit I'm hesitant to remove such code, when
there's no clear indication why it's there. I'm also not sure how much
of a win the dropping would be, considering the places where this
function gets called from.

Jan



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/4] VT-d / x86: re-arrange cache syncing
  2021-12-01 14:39   ` Andrew Cooper
@ 2021-12-02  9:19     ` Jan Beulich
  2021-12-24  7:24       ` Tian, Kevin
  0 siblings, 1 reply; 16+ messages in thread
From: Jan Beulich @ 2021-12-02  9:19 UTC (permalink / raw)
  To: Andrew Cooper
  Cc: Paul Durrant, Andrew Cooper, Wei Liu, Roger Pau Monné,
	Kevin Tian, xen-devel

On 01.12.2021 15:39, Andrew Cooper wrote:
> On 01/12/2021 09:40, Jan Beulich wrote:
>> The actual function should always have lived in core x86 code; move it
>> there, replacing get_cache_line_size() by readily available (except very
>> early during boot; see the code comment) data.
>>
>> Drop the respective IOMMU hook, (re)introducing a respective boolean
>> instead. Replace a true and an almost open-coding instance of
>> iommu_sync_cache().
> 
> Coherency (or not) is a per-IOMMU property, not a global platform
> property.  iGPU IOMMUs are very different to those the uncore, and
> there's no reason to presume that IOMMUs in the PCH would have the same
> properties as those in the uncore.
> 
> Given how expensive sync_cache() is, we cannot afford to be using it for
> coherent IOMMUs in a mixed system.
> 
> This wants to be a boolean in arch_iommu.

That's a valid consideration, but may not be as easy as it may seem on
the surface. Certainly not something I could promise to find time for
soon. And definitely separate from the specific change here.

>> ---
>> Placing the function next to flush_area_local() exposes a curious
>> asymmetry between the SFENCE placements: sync_cache() has it after the
>> flush, while flush_area_local() has it before it. I think the latter one
>> is misplaced.
> 
> Wow this is a mess.
> 
> First and foremost, AMD state that on pre-CLFLUSHOPT parts, CLFLUSH is
> unordered with ~anything (including SFENCE), and need bounding with
> MFENCE on both sides.  We definitely aren't doing this correctly right now.
> 
> 
> AMD explicitly states that SFENCE drains the store and WC buffers (i.e.
> make stuff instantaneously globally visible).  Intel does not, and
> merely guarantees ordering.
> 
> A leading SFENCE would only make sense if there were WC concerns, but
> both manuals say that the memory type doesn't matter, so I can't see a
> justification for it.
> 
> What does matter, from the IOMMU's point of view, is that the line has
> been written back (or evicted on pre-CLWB parts) before the IOTLB
> invalidation occurs.  The invalidation will be a write to a different
> address, which is why the trailing SFENCE is necessary, as CLFLUSHOPT
> isn't ordered with respect to unaliasing writes.

IOW for the purposes of this change all is correct, and everything else
will require taking care of separately.

> On a more minor note, both Intel and AMD say that CLFLUSH* are permitted
> to target an execute-only code segment, so we need a fix in
> hvmemul_cache_op()'s use of hvmemul_virtual_to_linear(...,
> hvm_access_read, ...) which will currently #GP in this case.

Hmm, this specific case would seem to require to simply use hvm_access_none
(like hvmemul_tlb_op() already does), except for CLWB.

But then hvm_vcpu_virtual_to_linear() also doesn't look to handle
hvm_access_insn_fetch correctly for data segments. Changing of which would
in turn require to first audit all use sites, to make sure we don't break
anything.

I'll see about doing both.

>> --- a/xen/arch/x86/flushtlb.c
>> +++ b/xen/arch/x86/flushtlb.c
>> @@ -11,6 +11,7 @@
>>  #include <xen/sched.h>
>>  #include <xen/smp.h>
>>  #include <xen/softirq.h>
>> +#include <asm/cache.h>
>>  #include <asm/flushtlb.h>
>>  #include <asm/invpcid.h>
>>  #include <asm/nops.h>
>> @@ -265,6 +266,57 @@ unsigned int flush_area_local(const void
>>      return flags;
>>  }
>>  
>> +void sync_cache(const void *addr, unsigned int size)
> 
> Can we name this cache_writeback()?  sync is very generic, and it really
> doesn't want confusing cache_flush().

Oh, sure, can do. As long as you don't insist on also changing
iommu_sync_cache(): While purely mechanical, this would bloat the
patch by quite a bit.

Bottom line: This last item is the only change to the actual patch;
everything else will require further work yielding separate patches.

Jan



^ permalink raw reply	[flat|nested] 16+ messages in thread

* RE: [PATCH 2/4] VT-d / x86: re-arrange cache syncing
  2021-12-02  9:19     ` Jan Beulich
@ 2021-12-24  7:24       ` Tian, Kevin
  0 siblings, 0 replies; 16+ messages in thread
From: Tian, Kevin @ 2021-12-24  7:24 UTC (permalink / raw)
  To: Beulich, Jan, Andrew Cooper
  Cc: Paul Durrant, Cooper, Andrew, Wei Liu, Pau Monné, Roger, xen-devel

> From: Jan Beulich <jbeulich@suse.com>
> Sent: Thursday, December 2, 2021 5:19 PM
> 
> On 01.12.2021 15:39, Andrew Cooper wrote:
> > On 01/12/2021 09:40, Jan Beulich wrote:
> >> The actual function should always have lived in core x86 code; move it
> >> there, replacing get_cache_line_size() by readily available (except very
> >> early during boot; see the code comment) data.
> >>
> >> Drop the respective IOMMU hook, (re)introducing a respective boolean
> >> instead. Replace a true and an almost open-coding instance of
> >> iommu_sync_cache().
> >
> > Coherency (or not) is a per-IOMMU property, not a global platform
> > property.  iGPU IOMMUs are very different to those the uncore, and
> > there's no reason to presume that IOMMUs in the PCH would have the
> same
> > properties as those in the uncore.
> >
> > Given how expensive sync_cache() is, we cannot afford to be using it for
> > coherent IOMMUs in a mixed system.
> >
> > This wants to be a boolean in arch_iommu.
> 
> That's a valid consideration, but may not be as easy as it may seem on
> the surface. Certainly not something I could promise to find time for
> soon. And definitely separate from the specific change here.

I'm fine with this patch if you prefer to a staging approach to improve it.
By any means this patch doesn't make things worse.

> 
> >> ---
> >> Placing the function next to flush_area_local() exposes a curious
> >> asymmetry between the SFENCE placements: sync_cache() has it after the
> >> flush, while flush_area_local() has it before it. I think the latter one
> >> is misplaced.
> >
> > Wow this is a mess.
> >
> > First and foremost, AMD state that on pre-CLFLUSHOPT parts, CLFLUSH is
> > unordered with ~anything (including SFENCE), and need bounding with
> > MFENCE on both sides.  We definitely aren't doing this correctly right now.
> >
> >
> > AMD explicitly states that SFENCE drains the store and WC buffers (i.e.
> > make stuff instantaneously globally visible).  Intel does not, and
> > merely guarantees ordering.
> >
> > A leading SFENCE would only make sense if there were WC concerns, but
> > both manuals say that the memory type doesn't matter, so I can't see a
> > justification for it.
> >
> > What does matter, from the IOMMU's point of view, is that the line has
> > been written back (or evicted on pre-CLWB parts) before the IOTLB
> > invalidation occurs.  The invalidation will be a write to a different
> > address, which is why the trailing SFENCE is necessary, as CLFLUSHOPT
> > isn't ordered with respect to unaliasing writes.
> 
> IOW for the purposes of this change all is correct, and everything else
> will require taking care of separately.
> 

Same for this part. btw Linux does mfence both before and after clflush.

Thanks
Kevin

^ permalink raw reply	[flat|nested] 16+ messages in thread

* RE: [PATCH 3/4] VT-d: replace flush_all_cache()
  2021-12-02  8:47     ` Jan Beulich
@ 2021-12-24  7:28       ` Tian, Kevin
  0 siblings, 0 replies; 16+ messages in thread
From: Tian, Kevin @ 2021-12-24  7:28 UTC (permalink / raw)
  To: Beulich, Jan, Andrew Cooper; +Cc: Paul Durrant, xen-devel

> From: Jan Beulich <jbeulich@suse.com>
> Sent: Thursday, December 2, 2021 4:48 PM
> 
> On 01.12.2021 14:02, Andrew Cooper wrote:
> > On 01/12/2021 09:41, Jan Beulich wrote:
> >> --- a/xen/drivers/passthrough/vtd/iommu.c
> >> +++ b/xen/drivers/passthrough/vtd/iommu.c
> >> @@ -591,7 +591,8 @@ static int __must_check iommu_flush_all(
> >>      bool_t flush_dev_iotlb;
> >>      int rc = 0;
> >>
> >> -    flush_all_cache();
> >> +    flush_local(FLUSH_CACHE);
> >
> > While I agree that the conversion is an improvement, the logic still
> > looks totally bogus.
> >
> > I can believe that it might have been a stopgap to fix problems before
> > we identified the need for sync_cache() for non-coherent IOMMUs, but
> > there's no need I can spot for any WBINVDs on any of these paths.
> >
> > I'm fairly sure this should just be dropped, and Xen will get faster as
> > a result.
> 
> Kevin, thoughts? I have to admit I'm hesitant to remove such code, when
> there's no clear indication why it's there. I'm also not sure how much
> of a win the dropping would be, considering the places where this
> function gets called from.
> 

me too. Could Andrew elaborate further on "fairly sure" part?

Thanks
Kevin

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2021-12-24  7:28 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-12-01  9:35 [PATCH 0/4] (mainly) IOMMU hook adjustments Jan Beulich
2021-12-01  9:39 ` [PATCH 1/4] IOMMU/x86: switch to alternatives-call patching in further instances Jan Beulich
2021-12-01 15:13   ` Andrew Cooper
2021-12-02  7:24     ` Jan Beulich
2021-12-01  9:40 ` [PATCH 2/4] VT-d / x86: re-arrange cache syncing Jan Beulich
2021-12-01 14:39   ` Andrew Cooper
2021-12-02  9:19     ` Jan Beulich
2021-12-24  7:24       ` Tian, Kevin
2021-12-01  9:41 ` [PATCH 3/4] VT-d: replace flush_all_cache() Jan Beulich
2021-12-01 13:02   ` Andrew Cooper
2021-12-02  8:47     ` Jan Beulich
2021-12-24  7:28       ` Tian, Kevin
2021-12-01  9:42 ` [PATCH 4/4] libxc: correct bounce direction in xc_get_device_group() Jan Beulich
2021-12-01 12:11   ` Juergen Gross
2021-12-01 15:11   ` Andrew Cooper
2021-12-02  7:30     ` Jan Beulich

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.