All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH 1/7] common/pv-iommu: Add stub hypercall for PV-IOMMU
       [not found] <1455099035-17649-1-git-send-email-malcolm.crossley@citrix.com>
@ 2016-02-10 10:10 ` Malcolm Crossley
  2016-02-10 10:36   ` Malcolm Crossley
  2016-02-17 20:09   ` Konrad Rzeszutek Wilk
  2016-02-10 10:10 ` [RFC PATCH 2/7] iommu: add iommu_lookup_page to lookup guest gfn for a particular IOMMU mapping Malcolm Crossley
                   ` (6 subsequent siblings)
  7 siblings, 2 replies; 21+ messages in thread
From: Malcolm Crossley @ 2016-02-10 10:10 UTC (permalink / raw)
  Cc: keir, ian.campbell, andrew.cooper3, tim, xen-devel, jbeulich,
	Malcolm Crossley

Signed-off-by: Malcolm Crossley <malcolm.crossley@citrix.com>
--
Cc: jbeulich@suse.com
Cc: andrew.cooper3@citrix.com
Cc: ian.campbell@citrix.com
Cc: keir@xen.org
Cc: tim@xen.org
Cc: xen-devel@lists.xen.org
---
 xen/arch/x86/x86_64/compat/entry.S |  2 ++
 xen/arch/x86/x86_64/entry.S        |  2 ++
 xen/common/Makefile                |  1 +
 xen/common/pv_iommu.c              | 38 ++++++++++++++++++++++++++++++++++++++
 xen/include/public/xen.h           |  1 +
 5 files changed, 44 insertions(+)
 create mode 100644 xen/common/pv_iommu.c

diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S
index 3088aa7..53a1689 100644
--- a/xen/arch/x86/x86_64/compat/entry.S
+++ b/xen/arch/x86/x86_64/compat/entry.S
@@ -436,6 +436,7 @@ ENTRY(compat_hypercall_table)
         .quad do_tmem_op
         .quad do_ni_hypercall           /* reserved for XenClient */
         .quad do_xenpmu_op              /* 40 */
+        .quad do_iommu_op
         .rept __HYPERVISOR_arch_0-((.-compat_hypercall_table)/8)
         .quad compat_ni_hypercall
         .endr
@@ -487,6 +488,7 @@ ENTRY(compat_hypercall_args_table)
         .byte 1 /* do_tmem_op               */
         .byte 0 /* reserved for XenClient   */
         .byte 2 /* do_xenpmu_op             */  /* 40 */
+        .byte 2 /* do_iommu_op              */
         .rept __HYPERVISOR_arch_0-(.-compat_hypercall_args_table)
         .byte 0 /* compat_ni_hypercall      */
         .endr
diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
index 94a54aa..fee7191 100644
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -769,6 +769,7 @@ ENTRY(hypercall_table)
         .quad do_tmem_op
         .quad do_ni_hypercall       /* reserved for XenClient */
         .quad do_xenpmu_op          /* 40 */
+        .quad do_iommu_op
         .rept __HYPERVISOR_arch_0-((.-hypercall_table)/8)
         .quad do_ni_hypercall
         .endr
@@ -820,6 +821,7 @@ ENTRY(hypercall_args_table)
         .byte 1 /* do_tmem_op           */
         .byte 0 /* reserved for XenClient */
         .byte 2 /* do_xenpmu_op         */  /* 40 */
+        .byte 2 /* do_iommu_op          */
         .rept __HYPERVISOR_arch_0-(.-hypercall_args_table)
         .byte 0 /* do_ni_hypercall      */
         .endr
diff --git a/xen/common/Makefile b/xen/common/Makefile
index 6e82b33..b498589 100644
--- a/xen/common/Makefile
+++ b/xen/common/Makefile
@@ -25,6 +25,7 @@ obj-y += notifier.o
 obj-y += page_alloc.o
 obj-$(CONFIG_HAS_PDX) += pdx.o
 obj-y += preempt.o
+obj-y += pv_iommu.o
 obj-y += random.o
 obj-y += rangeset.o
 obj-y += radix-tree.o
diff --git a/xen/common/pv_iommu.c b/xen/common/pv_iommu.c
new file mode 100644
index 0000000..304fccf
--- /dev/null
+++ b/xen/common/pv_iommu.c
@@ -0,0 +1,38 @@
+/******************************************************************************
+ * common/pv_iommu.c
+ * 
+ * Paravirtualised IOMMU functionality
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <xen/guest_access.h>
+
+#define ret_t long
+
+ret_t do_iommu_op(XEN_GUEST_HANDLE_PARAM(void) arg, unsigned int count)
+{
+    return -ENOSYS;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
+
diff --git a/xen/include/public/xen.h b/xen/include/public/xen.h
index 7b629b1..ff50e7a 100644
--- a/xen/include/public/xen.h
+++ b/xen/include/public/xen.h
@@ -102,6 +102,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_ulong_t);
 #define __HYPERVISOR_tmem_op              38
 #define __HYPERVISOR_xc_reserved_op       39 /* reserved for XenClient */
 #define __HYPERVISOR_xenpmu_op            40
+#define __HYPERVISOR_iommu_op             41
 
 /* Architecture-specific hypercall definitions. */
 #define __HYPERVISOR_arch_0               48
-- 
1.7.12.4

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [RFC PATCH 2/7] iommu: add iommu_lookup_page to lookup guest gfn for a particular IOMMU mapping
       [not found] <1455099035-17649-1-git-send-email-malcolm.crossley@citrix.com>
  2016-02-10 10:10 ` [RFC PATCH 1/7] common/pv-iommu: Add stub hypercall for PV-IOMMU Malcolm Crossley
@ 2016-02-10 10:10 ` Malcolm Crossley
  2016-02-17 20:22   ` Konrad Rzeszutek Wilk
  2016-02-10 10:10 ` [RFC PATCH 3/7] VT-d: Add iommu_lookup_page support Malcolm Crossley
                   ` (5 subsequent siblings)
  7 siblings, 1 reply; 21+ messages in thread
From: Malcolm Crossley @ 2016-02-10 10:10 UTC (permalink / raw)
  Cc: Malcolm Crossley, jbeulich, xen-devel

If IOMMU driver does not implement lookup_page function then it returns -ENOMEM.

Returns 0 on success and any other value on failure.

Signed-off-by: Malcolm Crossley <malcolm.crossley@citrix.com>
--
Cc: jbeulich@suse.com
Cc: xen-devel@lists.xen.org
---
 xen/drivers/passthrough/iommu.c | 21 +++++++++++++++++++++
 xen/include/xen/iommu.h         |  2 ++
 2 files changed, 23 insertions(+)

diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c
index 0b2abf4..06f21ee 100644
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -257,6 +257,27 @@ int iommu_unmap_page(struct domain *d, unsigned long gfn)
     return hd->platform_ops->unmap_page(d, gfn);
 }
 
+int iommu_lookup_page(struct domain *d, unsigned long bfn, unsigned long *gfn)
+{
+    struct hvm_iommu *hd = domain_hvm_iommu(d);
+
+    /* 
+     * BFN maps 1:1 to GFN when iommu passthrough is enabled or 
+     * when IOMMU shared page tables is in use
+     */
+    if ( iommu_use_hap_pt(d) || (iommu_passthrough && is_hardware_domain(d)) )
+    {
+        *gfn = bfn;
+        return 0;
+    }
+
+    if ( !iommu_enabled || !hd->platform_ops ||
+            !hd->platform_ops->lookup_page )
+        return -ENOMEM;
+
+    return hd->platform_ops->lookup_page(d, bfn, gfn);
+}
+
 static void iommu_free_pagetables(unsigned long unused)
 {
     do {
diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h
index 8217cb7..49ca087 100644
--- a/xen/include/xen/iommu.h
+++ b/xen/include/xen/iommu.h
@@ -77,6 +77,7 @@ void iommu_teardown(struct domain *d);
 int iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn,
                    unsigned int flags);
 int iommu_unmap_page(struct domain *d, unsigned long gfn);
+int iommu_lookup_page(struct domain *d, unsigned long bfn, unsigned long *gfn);
 
 enum iommu_feature
 {
@@ -151,6 +152,7 @@ struct iommu_ops {
     int (*map_page)(struct domain *d, unsigned long gfn, unsigned long mfn,
                     unsigned int flags);
     int (*unmap_page)(struct domain *d, unsigned long gfn);
+    int (*lookup_page)(struct domain *d, unsigned long bfn, unsigned long *gfn);
     void (*free_page_table)(struct page_info *);
 #ifdef CONFIG_X86
     void (*update_ire_from_apic)(unsigned int apic, unsigned int reg, unsigned int value);
-- 
1.7.12.4

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [RFC PATCH 3/7] VT-d: Add iommu_lookup_page support
       [not found] <1455099035-17649-1-git-send-email-malcolm.crossley@citrix.com>
  2016-02-10 10:10 ` [RFC PATCH 1/7] common/pv-iommu: Add stub hypercall for PV-IOMMU Malcolm Crossley
  2016-02-10 10:10 ` [RFC PATCH 2/7] iommu: add iommu_lookup_page to lookup guest gfn for a particular IOMMU mapping Malcolm Crossley
@ 2016-02-10 10:10 ` Malcolm Crossley
  2016-02-17 20:32   ` Konrad Rzeszutek Wilk
  2016-02-10 10:10 ` [RFC PATCH 4/7] common/pv-iommu: Add query, map and unmap ops Malcolm Crossley
                   ` (4 subsequent siblings)
  7 siblings, 1 reply; 21+ messages in thread
From: Malcolm Crossley @ 2016-02-10 10:10 UTC (permalink / raw)
  Cc: Malcolm Crossley, feng.wu, kevin.tian, xen-devel

Function does not need to handle shared EPT use of IOMMU as core code
already handles this.

Signed-off-by: Malcolm Crossley <malcolm.crossley@citrix.com>
--
Cc: kevin.tian@intel.com
Cc: feng.wu@intel.com
Cc: xen-devel@lists.xen.org
---
 xen/drivers/passthrough/vtd/iommu.c | 31 +++++++++++++++++++++++++++++++
 xen/drivers/passthrough/vtd/iommu.h |  1 +
 2 files changed, 32 insertions(+)

diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c
index ec31c6b..0c79b48 100644
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -1754,6 +1754,36 @@ static int intel_iommu_unmap_page(struct domain *d, unsigned long gfn)
     return 0;
 }
 
+static int intel_iommu_lookup_page(
+    struct domain *d, unsigned long gfn, unsigned long *mfn)
+{
+    struct hvm_iommu *hd = domain_hvm_iommu(d);
+    struct dma_pte *page = NULL, *pte = NULL, old;
+    u64 pg_maddr;
+
+    spin_lock(&hd->arch.mapping_lock);
+
+    pg_maddr = addr_to_dma_page_maddr(d, (paddr_t)gfn << PAGE_SHIFT_4K, 1);
+    if ( pg_maddr == 0 )
+    {
+        spin_unlock(&hd->arch.mapping_lock);
+        return -ENOMEM;
+    }
+    page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
+    pte = page + (gfn & LEVEL_MASK);
+    old = *pte;
+    if (!dma_pte_present(old)) {
+        unmap_vtd_domain_page(page);
+        spin_unlock(&hd->arch.mapping_lock);
+        return -ENOMEM;
+    }
+    unmap_vtd_domain_page(page);
+    spin_unlock(&hd->arch.mapping_lock);
+
+    *mfn = dma_get_pte_addr(old) >> PAGE_SHIFT_4K;
+    return 0;
+}
+
 void iommu_pte_flush(struct domain *d, u64 gfn, u64 *pte,
                      int order, int present)
 {
@@ -2534,6 +2564,7 @@ const struct iommu_ops intel_iommu_ops = {
     .teardown = iommu_domain_teardown,
     .map_page = intel_iommu_map_page,
     .unmap_page = intel_iommu_unmap_page,
+    .lookup_page = intel_iommu_lookup_page,
     .free_page_table = iommu_free_page_table,
     .reassign_device = reassign_device_ownership,
     .get_device_group_id = intel_iommu_group_id,
diff --git a/xen/drivers/passthrough/vtd/iommu.h b/xen/drivers/passthrough/vtd/iommu.h
index c55ee08..03583ef 100644
--- a/xen/drivers/passthrough/vtd/iommu.h
+++ b/xen/drivers/passthrough/vtd/iommu.h
@@ -275,6 +275,7 @@ struct dma_pte {
 #define dma_pte_addr(p) ((p).val & PADDR_MASK & PAGE_MASK_4K)
 #define dma_set_pte_addr(p, addr) do {\
             (p).val |= ((addr) & PAGE_MASK_4K); } while (0)
+#define dma_get_pte_addr(p) (((p).val & PAGE_MASK_4K))
 #define dma_pte_present(p) (((p).val & DMA_PTE_PROT) != 0)
 #define dma_pte_superpage(p) (((p).val & DMA_PTE_SP) != 0)
 
-- 
1.7.12.4

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [RFC PATCH 4/7] common/pv-iommu: Add query, map and unmap ops
       [not found] <1455099035-17649-1-git-send-email-malcolm.crossley@citrix.com>
                   ` (2 preceding siblings ...)
  2016-02-10 10:10 ` [RFC PATCH 3/7] VT-d: Add iommu_lookup_page support Malcolm Crossley
@ 2016-02-10 10:10 ` Malcolm Crossley
  2016-02-17 21:05   ` Konrad Rzeszutek Wilk
  2016-02-10 10:10 ` [RFC PATCH 5/7] x86/m2b: Add a tracking structure for mfn to bfn mappings per page Malcolm Crossley
                   ` (3 subsequent siblings)
  7 siblings, 1 reply; 21+ messages in thread
From: Malcolm Crossley @ 2016-02-10 10:10 UTC (permalink / raw)
  Cc: keir, andrew.cooper3, tim, xen-devel, jbeulich, Malcolm Crossley

Implement above ops according to PV-IOMMU design draft D.

Currently restricted to hardware domains only due to RFC status.

Signed-off-by: Malcolm Crossley <malcolm.crossley@citrix.com>
--
Cc: jbeulich@suse.com
Cc: keir@xen.org
Cc: tim@xen.org
Cc: andrew.cooper3@citrix.com
Cc: xen-devel@lists.xen.org
---
 xen/common/pv_iommu.c         | 228 +++++++++++++++++++++++++++++++++++++++++-
 xen/include/public/pv-iommu.h |  69 +++++++++++++
 2 files changed, 296 insertions(+), 1 deletion(-)
 create mode 100644 xen/include/public/pv-iommu.h

diff --git a/xen/common/pv_iommu.c b/xen/common/pv_iommu.c
index 304fccf..91485f3 100644
--- a/xen/common/pv_iommu.c
+++ b/xen/common/pv_iommu.c
@@ -17,13 +17,239 @@
  * along with this program; If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <asm/p2m.h>
+#include <asm/event.h>
 #include <xen/guest_access.h>
+#include <public/pv-iommu.h>
 
 #define ret_t long
 
+static int get_paged_frame(unsigned long gfn, unsigned long *frame,
+                           struct page_info **page, int readonly,
+                           struct domain *rd)
+{
+    int rc = 0;
+#if defined(P2M_PAGED_TYPES) || defined(P2M_SHARED_TYPES)
+    p2m_type_t p2mt;
+
+    *page = get_page_from_gfn(rd, gfn, &p2mt,
+                             (readonly) ? P2M_ALLOC : P2M_UNSHARE);
+    if ( !(*page) )
+    {
+        *frame = INVALID_MFN;
+        if ( p2m_is_shared(p2mt) )
+            return -EIO;
+        if ( p2m_is_paging(p2mt) )
+        {
+            p2m_mem_paging_populate(rd, gfn);
+            return -EIO;
+        }
+        return -EIO;
+    }
+    *frame = page_to_mfn(*page);
+#else
+    *frame = gmfn_to_mfn(rd, gfn);
+    *page = mfn_valid(*frame) ? mfn_to_page(*frame) : NULL;
+    if ( (!(page)) || (!get_page*page, rd) )
+    {
+        *frame = INVALID_MFN;
+        *page = NULL;
+        rc = -EIO;
+    }
+#endif
+
+    return rc;
+}
+
+int can_use_iommu_check(struct domain *d)
+{
+    if ( !iommu_enabled || (!is_hardware_domain(d) && !need_iommu(d)) )
+        return 0;
+
+    if ( is_hardware_domain(d) && iommu_passthrough )
+        return 0;
+
+    if ( !domain_hvm_iommu(d)->platform_ops->lookup_page )
+        return 0;
+
+    return 1;
+}
+
+void do_iommu_sub_op(struct pv_iommu_op *op)
+{
+    struct domain *d = current->domain;
+    struct domain *rd = NULL;
+
+    /* Only order 0 pages supported */
+    if ( IOMMU_get_page_order(op->flags) != 0 )
+    {
+        op->status = -ENOSPC;
+        goto finish;
+    }
+
+    switch ( op->subop_id )
+    {
+        case IOMMUOP_query_caps:
+        {
+            op->flags = 0;
+            op->status = 0;
+            if ( can_use_iommu_check(d) )
+                op->flags |= IOMMU_QUERY_map_cap;
+
+            break;
+        }
+        case IOMMUOP_map_page:
+        {
+            unsigned long mfn, tmp;
+            unsigned int flags = 0;
+            struct page_info *page = NULL;
+
+            /* Check if calling domain can create IOMMU mappings */
+            if ( !can_use_iommu_check(d) )
+            {
+                op->status = -EPERM;
+                goto finish;
+            }
+
+            /* Check we are the owner of the page */
+            if ( !is_hardware_domain(d) &&
+                 ( maddr_get_owner(op->u.map_page.gfn) != d ) )
+            {
+                op->status = -EPERM;
+                goto finish;
+            }
+
+            /* Lookup page struct backing gfn */
+            if ( is_hardware_domain(d) &&
+                (op->flags & IOMMU_MAP_OP_no_ref_cnt) )
+            {
+                mfn = op->u.map_page.gfn;
+                page = mfn_to_page(mfn);
+                if (!page)
+                {
+                    op->status = -EPERM;
+                    goto finish;
+                }
+            } else if ( get_paged_frame(op->u.map_page.gfn, &mfn, &page, 0, d) )
+            {
+                op->status = -EPERM;
+                goto finish;
+            }
+
+            /* Check for conflict with existing BFN mappings */
+            if ( !iommu_lookup_page(d, op->u.map_page.bfn, &tmp) )
+            {
+                if ( !(op->flags & IOMMU_MAP_OP_no_ref_cnt) )
+                    put_page(page);
+                op->status = -EPERM;
+                goto finish;
+            }
+
+            if ( op->flags & IOMMU_OP_readable )
+                flags |= IOMMUF_readable;
+
+            if ( op->flags & IOMMU_OP_writeable )
+                flags |= IOMMUF_writable;
+
+            if ( iommu_map_page(d, op->u.map_page.bfn, mfn, flags) )
+            {
+                if ( !(op->flags & IOMMU_MAP_OP_no_ref_cnt) )
+                    put_page(page);
+                op->status = -EIO;
+                goto finish;
+            }
+
+            op->status = 0;
+            break;
+        }
+
+        case IOMMUOP_unmap_page:
+        {
+            unsigned long mfn;
+
+            /* Check if there is a valid BFN mapping for this domain */
+            if ( iommu_lookup_page(d, op->u.unmap_page.bfn, &mfn) )
+            {
+                op->status = -ENOENT;
+                goto finish;
+            }
+
+            if ( iommu_unmap_page(d, op->u.unmap_page.bfn) )
+            {
+                op->status = -EIO;
+                goto finish;
+            }
+
+            op->status = 0;
+            break;
+        }
+
+        default:
+            op->status = -ENODEV;
+            break;
+    }
+
+finish:
+    if ( rd )
+        rcu_unlock_domain(rd);
+
+    return;
+}
+
 ret_t do_iommu_op(XEN_GUEST_HANDLE_PARAM(void) arg, unsigned int count)
 {
-    return -ENOSYS;
+    ret_t ret = 0;
+    int i;
+    struct pv_iommu_op op;
+    struct domain *d = current->domain;
+
+    if ( !is_hardware_domain(d) )
+        return -ENOSYS;
+
+    if ( (int)count < 0 )
+        return -EINVAL;
+
+    if ( count > 1 )
+        this_cpu(iommu_dont_flush_iotlb) = 1;
+
+    for ( i = 0; i < count; i++ )
+    {
+        if ( i && hypercall_preempt_check() )
+        {
+            ret =  i;
+            goto flush_pages;
+        }
+        if ( unlikely(__copy_from_guest_offset(&op, arg, i, 1)) )
+        {
+            ret = -EFAULT;
+            goto flush_pages;
+        }
+        do_iommu_sub_op(&op);
+        if ( unlikely(__copy_to_guest_offset(arg, i, &op, 1)) )
+        {
+            ret = -EFAULT;
+            goto flush_pages;
+        }
+    }
+
+flush_pages:
+    if ( ret > 0 )
+    {
+        XEN_GUEST_HANDLE_PARAM(pv_iommu_op_t) op =
+            guest_handle_cast(arg, pv_iommu_op_t);
+        ASSERT(ret < count);
+        guest_handle_add_offset(op, i);
+        arg = guest_handle_cast(op, void);
+        ret = hypercall_create_continuation(__HYPERVISOR_iommu_op,
+                                           "hi", arg, count - i);
+    }
+    if ( count > 1 )
+    {
+        this_cpu(iommu_dont_flush_iotlb) = 0;
+        if ( i )
+            iommu_iotlb_flush_all(d);
+    }
+    return ret;
 }
 
 /*
diff --git a/xen/include/public/pv-iommu.h b/xen/include/public/pv-iommu.h
new file mode 100644
index 0000000..0f19f96
--- /dev/null
+++ b/xen/include/public/pv-iommu.h
@@ -0,0 +1,69 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __XEN_PUBLIC_PV_IOMMU_H__
+#define __XEN_PUBLIC_PV_IOMMU_H__
+
+#include "xen.h"
+
+#define IOMMUOP_query_caps            1
+#define IOMMUOP_map_page              2
+#define IOMMUOP_unmap_page            3
+
+struct pv_iommu_op {
+    uint16_t subop_id;
+
+#define IOMMU_page_order (0xf1 << 10)
+#define IOMMU_get_page_order(flags) ((flags & IOMMU_page_order) >> 10)
+#define IOMMU_QUERY_map_cap (1 << 0)
+#define IOMMU_QUERY_map_all_mfns (1 << 1)
+#define IOMMU_OP_readable (1 << 0)
+#define IOMMU_OP_writeable (1 << 1)
+#define IOMMU_MAP_OP_no_ref_cnt (1 << 2)
+    uint16_t flags;
+    int32_t status;
+
+    union {
+        struct {
+            uint64_t bfn;
+            uint64_t gfn;
+        } map_page;
+
+        struct {
+            uint64_t bfn;
+        } unmap_page;
+    } u;
+};
+
+
+typedef struct pv_iommu_op pv_iommu_op_t;
+DEFINE_XEN_GUEST_HANDLE(pv_iommu_op_t);
+
+#endif
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
-- 
1.7.12.4

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [RFC PATCH 5/7] x86/m2b: Add a tracking structure for mfn to bfn mappings per page
       [not found] <1455099035-17649-1-git-send-email-malcolm.crossley@citrix.com>
                   ` (3 preceding siblings ...)
  2016-02-10 10:10 ` [RFC PATCH 4/7] common/pv-iommu: Add query, map and unmap ops Malcolm Crossley
@ 2016-02-10 10:10 ` Malcolm Crossley
  2016-02-24 17:07   ` George Dunlap
  2016-02-10 10:10 ` [RFC PATCH 6/7] common/pv-iommu: Add foreign ops to PV-IOMMU interface Malcolm Crossley
                   ` (2 subsequent siblings)
  7 siblings, 1 reply; 21+ messages in thread
From: Malcolm Crossley @ 2016-02-10 10:10 UTC (permalink / raw)
  Cc: keir, george.dunlap, andrew.cooper3, xen-devel, jbeulich,
	Malcolm Crossley

Add a pointer to the page struct which refers to the head of
m2b tracking structure for that page.

Atomically add a PGC bit to the page struct when setting the pointer to
the m2b tracking structure.

Adding elements to the per page m2b tracking structure is done via a
RCU protected linked list.

Callers of add_m2b and del_m2b must hold the page lock for the page.
This prevents the m2b code racing with itself and allows for fine
gain locking. References to the page are also expected to be taken and
dropped by the callers to the m2b code.

m2b references are forcibly removed as part of domain destroy to ensure malicious
guests do not cause memory

To aid emulators tracking state, with asynchronous domain destruction, an
ioreq is issued to all affected emulators for each m2b mapping being removed.

Signed-off-by: Malcolm Crossley <malcolm.crossley@citrix.com>
--
Cc: keir@xen.org
Cc: jbeulich@suse.com
Cc: andrew.cooper3@citrix.com
Cc: george.dunlap@eu.citrix.com
Cc: xen-devel@lists.xen.org
---
 xen/arch/x86/domain.c          |  12 ++-
 xen/arch/x86/mm/Makefile       |   1 +
 xen/arch/x86/mm/m2b.c          | 209 +++++++++++++++++++++++++++++++++++++++++
 xen/common/memory.c            |  11 +++
 xen/include/asm-x86/domain.h   |   1 +
 xen/include/asm-x86/m2b.h      |  59 ++++++++++++
 xen/include/asm-x86/mm.h       |  12 ++-
 xen/include/public/hvm/ioreq.h |   1 +
 xen/include/xen/sched.h        |   4 +
 9 files changed, 308 insertions(+), 2 deletions(-)
 create mode 100644 xen/arch/x86/mm/m2b.c
 create mode 100644 xen/include/asm-x86/m2b.h

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 9d43f7b..715502e 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -62,6 +62,7 @@
 #include <xen/iommu.h>
 #include <compat/vcpu.h>
 #include <asm/psr.h>
+#include <asm/m2b.h>
 
 DEFINE_PER_CPU(struct vcpu *, curr_vcpu);
 DEFINE_PER_CPU(unsigned long, cr4);
@@ -2511,7 +2512,7 @@ int domain_relinquish_resources(struct domain *d)
                 return ret;
         }
 
-        d->arch.relmem = RELMEM_xen;
+        d->arch.relmem = RELMEM_m2b;
 
         spin_lock(&d->page_alloc_lock);
         page_list_splice(&d->arch.relmem_list, &d->page_list);
@@ -2519,6 +2520,15 @@ int domain_relinquish_resources(struct domain *d)
         spin_unlock(&d->page_alloc_lock);
 
         /* Fallthrough. Relinquish every page of memory. */
+
+    case RELMEM_m2b:
+
+        ret = m2b_domain_destroy(d, d->m2b_destroy_mfn);
+        if ( ret )
+            return ret;
+        d->arch.relmem = RELMEM_xen;
+        /* fallthrough */
+
     case RELMEM_xen:
         ret = relinquish_memory(d, &d->xenpage_list, ~0UL);
         if ( ret )
diff --git a/xen/arch/x86/mm/Makefile b/xen/arch/x86/mm/Makefile
index 9804c3a..84ad8c0 100644
--- a/xen/arch/x86/mm/Makefile
+++ b/xen/arch/x86/mm/Makefile
@@ -9,6 +9,7 @@ obj-y += guest_walk_3.o
 obj-y += guest_walk_4.o
 obj-y += mem_paging.o
 obj-y += mem_sharing.o
+obj-y += m2b.o
 
 guest_walk_%.o: guest_walk.c Makefile
 	$(CC) $(CFLAGS) -DGUEST_PAGING_LEVELS=$* -c $< -o $@
diff --git a/xen/arch/x86/mm/m2b.c b/xen/arch/x86/mm/m2b.c
new file mode 100644
index 0000000..078944a
--- /dev/null
+++ b/xen/arch/x86/mm/m2b.c
@@ -0,0 +1,209 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <asm/m2b.h>
+#include <asm/event.h>
+
+struct m2b_head
+{
+    struct page_info *pg;
+    struct list_head head;
+    unsigned int count;
+    struct rcu_head rcu_head;
+};
+
+DEFINE_RCU_READ_LOCK(m2b_rcu);
+
+struct m2b_entry *lookup_m2b_entry(struct page_info *page, struct domain *d,
+                                   ioservid_t ioserver, unsigned long bfn)
+{
+    struct m2b_entry *m2b_e = NULL;
+    struct list_head *entry;
+    domid_t domain = d->domain_id;
+
+    if ( !test_bit(_PGC_foreign_map, &page->count_info) )
+        return NULL;
+
+    rcu_read_lock(&m2b_rcu);
+    list_for_each_rcu(entry, &page->pv_iommu->head)
+    {
+        m2b_e = list_entry(entry, struct m2b_entry, list);
+        if ( m2b_e->domain == domain && m2b_e->ioserver == ioserver &&
+                m2b_e->bfn == bfn )
+                    goto done;
+        else if ( ioserver == IOSERVER_ANY && m2b_e->domain == domain &&
+                  m2b_e->bfn == bfn )
+                    goto done;
+        else if ( bfn == BFN_ANY && m2b_e->domain == domain &&
+                  m2b_e->ioserver == ioserver )
+                    goto done;
+        else if ( bfn == BFN_ANY && ioserver == IOSERVER_ANY &&
+                  m2b_e->domain == domain )
+                    goto done;
+    }
+done:
+    rcu_read_unlock(&m2b_rcu);
+
+    /* Nothing was found */
+    return m2b_e;
+}
+
+void notify_m2b_entries(struct page_info *page)
+{
+    return;
+}
+
+/* Must be called with page_lock held */
+int add_m2b_entry(struct page_info *page, struct domain *d,
+                  ioservid_t ioserver, uint64_t bfn)
+{
+    struct m2b_entry *m2b_e;
+    int head_allocated = 0;
+    domid_t domain = d->domain_id;
+
+    if ( !test_bit(_PGC_foreign_map, &page->count_info) )
+    {
+        page->pv_iommu = xmalloc(struct m2b_head);
+        if ( !page->pv_iommu )
+            return -ENOMEM;
+        head_allocated = 1;
+        INIT_LIST_HEAD(&page->pv_iommu->head);
+        INIT_RCU_HEAD(&page->pv_iommu->rcu_head);
+        set_bit(_PGC_foreign_map, &page->count_info);
+        page->pv_iommu->count = 0;
+    }
+
+    m2b_e = xmalloc(struct m2b_entry);
+    if ( !m2b_e )
+    {
+        if ( head_allocated )
+            xfree(page->pv_iommu);
+
+        return -ENOMEM;
+    }
+
+    m2b_e->domain = domain;
+    m2b_e->ioserver = ioserver;
+    m2b_e->bfn = bfn;
+
+    INIT_LIST_HEAD(&m2b_e->list);
+    INIT_RCU_HEAD(&m2b_e->rcu);
+    list_add_rcu(&m2b_e->list, &page->pv_iommu->head);
+
+    atomic_inc(&d->m2b_count);
+    page->pv_iommu->count++;
+    return 0;
+}
+
+void free_m2b_entry(struct rcu_head *rcu)
+{
+    xfree(container_of(rcu, struct m2b_entry, rcu));
+}
+
+/* Must be called with page_lock held */
+int del_m2b_entry(struct page_info *page, struct domain *d, ioservid_t ioserver,
+                  unsigned long bfn)
+{
+    struct m2b_entry *m2b_e;
+
+    m2b_e = lookup_m2b_entry(page, d, ioserver, bfn);
+    if ( !m2b_e )
+        return -ENOENT;
+
+    list_del_rcu(&m2b_e->list);
+    call_rcu(&m2b_e->rcu, free_m2b_entry);
+    page->pv_iommu->count--;
+    atomic_dec(&d->m2b_count);
+
+    if ( page->pv_iommu->count == 0 )
+    {
+        clear_bit(_PGC_foreign_map, &page->count_info);
+        xfree(page->pv_iommu);
+    }
+    return 0;
+}
+
+
+/* Remove all M2B entries created by the domain being destroyed */
+int m2b_domain_destroy(struct domain *d, unsigned long mfn)
+{
+    struct m2b_entry *m2b_e, *m2b_e_hwdom;
+    struct page_info *page;
+    int locked;
+
+    if ( ! atomic_read(&d->m2b_count) )
+        return 0;
+
+
+    for ( ; mfn < max_page; mfn++ )
+    {
+        /* Check for preemption every 4 MB */
+        if ( mfn % 0x1000 == 0 && mfn != d->m2b_destroy_mfn)
+        {
+            if ( hypercall_preempt_check() )
+            {
+                d->m2b_destroy_mfn = mfn;
+                return -ERESTART;
+            }
+        }
+        if (!mfn_valid(mfn))
+            continue;
+
+        page = mfn_to_page(mfn);
+        if ( !page || (page_get_owner(page) != d) )
+            continue;
+
+        m2b_e = lookup_m2b_entry(page, d,
+                        IOSERVER_ANY, BFN_ANY);
+        m2b_e_hwdom = lookup_m2b_entry(page, hardware_domain,
+                        IOSERVER_ANY, BFN_ANY);
+        if ( !m2b_e && !m2b_e_hwdom )
+            continue;
+
+        locked = page_lock(page);
+
+        /* Remove all M2B entries for this domain */
+        while ( (m2b_e = lookup_m2b_entry(page, d,
+                                          IOSERVER_ANY, BFN_ANY)) )
+        {
+            del_m2b_entry(page, d,
+                          m2b_e->ioserver,
+                          m2b_e->bfn);
+        }
+        /* Remove all M2B entries for hwdom */
+        while ( (m2b_e = lookup_m2b_entry(page, hardware_domain,
+                                          IOSERVER_ANY, BFN_ANY)) )
+        {
+            del_m2b_entry(page, hardware_domain,
+                          m2b_e->ioserver,
+                          m2b_e->bfn);
+            atomic_dec(&d->m2b_count);
+        }
+
+        if ( locked )
+            page_unlock(page);
+        /* Remove this domains reference */
+        put_page(page);
+        if ( ! atomic_read(&d->m2b_count) )
+            break;
+    }
+
+    return 0;
+}
diff --git a/xen/common/memory.c b/xen/common/memory.c
index 9ff1145..df470ba 100644
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -29,6 +29,7 @@
 #include <asm/p2m.h>
 #include <public/memory.h>
 #include <xsm/xsm.h>
+#include <asm/m2b.h>
 
 struct memop_args {
     /* INPUT */
@@ -316,6 +317,16 @@ int guest_remove_page(struct domain *d, unsigned long gmfn)
          test_and_clear_bit(_PGC_allocated, &page->count_info) )
         put_page(page);
 
+#ifdef CONFIG_X86
+    /* Check for M2B mapping */
+    if ( is_hvm_domain(d) &&
+            test_bit(_PGC_foreign_map, &page->count_info) )
+    {
+        /* Notifiy ioserver with M2B mappings */
+        notify_m2b_entries(page);
+    }
+#endif
+
     guest_physmap_remove_page(d, gmfn, mfn, 0);
 
     put_page(page);
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index 4072e27..6dc6ee5 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -302,6 +302,7 @@ struct arch_domain
     enum {
         RELMEM_not_started,
         RELMEM_shared,
+        RELMEM_m2b,
         RELMEM_xen,
         RELMEM_l4,
         RELMEM_l3,
diff --git a/xen/include/asm-x86/m2b.h b/xen/include/asm-x86/m2b.h
new file mode 100644
index 0000000..a21502c
--- /dev/null
+++ b/xen/include/asm-x86/m2b.h
@@ -0,0 +1,59 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __XEN_M2B_H__
+#define __XEN_M2B_H__
+
+#include <xen/sched.h>
+
+struct m2b_entry
+{
+    struct list_head list;
+    domid_t domain;
+    ioservid_t ioserver;
+    uint64_t bfn;
+    struct rcu_head rcu;
+};
+
+int m2b_domain_destroy(struct domain *d, unsigned long mfn);
+
+void notify_m2b_entries(struct page_info *page);
+
+#define BFN_ANY         ~0UL
+#define IOSERVER_ANY    ~0
+
+struct m2b_entry *lookup_m2b_entry(struct page_info *page, struct domain *d,
+                                   ioservid_t ioserver, unsigned long bfn);
+int add_m2b_entry(struct page_info *page, struct domain *d,
+                  ioservid_t ioserver, uint64_t bfn);
+int del_m2b_entry(struct page_info *page, struct domain *d, ioservid_t ioserver,
+                  unsigned long bfn);
+
+#endif
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index a097382..620ccd5 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -65,6 +65,12 @@ struct page_info
          */
         struct page_sharing_info *sharing;
     };
+    /* For foreign mapped pages, we use a doubly-linked list
+     * of all the {pfn,domain, ioserver} tuples that map this page.
+     * This list is allocated and freed from a page is foreign
+     * mapped/unmapped.
+     */
+    struct pv_iommu_info *pv_iommu;
 
     /* Reference count and various PGC_xxx flags and fields. */
     unsigned long count_info;
@@ -230,8 +236,12 @@ struct page_info
 #define PGC_state_free    PG_mask(3, 9)
 #define page_state_is(pg, st) (((pg)->count_info&PGC_state) == PGC_state_##st)
 
+/* Page has foreign mappings? */
+#define _PGC_foreign_map  PG_shift(10)
+#define PGC_foreign_map   PG_mask(1, 10)
+
  /* Count of references to this frame. */
-#define PGC_count_width   PG_shift(9)
+#define PGC_count_width   PG_shift(10)
 #define PGC_count_mask    ((1UL<<PGC_count_width)-1)
 
 struct spage_info
diff --git a/xen/include/public/hvm/ioreq.h b/xen/include/public/hvm/ioreq.h
index 2e5809b..c8ecc6e 100644
--- a/xen/include/public/hvm/ioreq.h
+++ b/xen/include/public/hvm/ioreq.h
@@ -37,6 +37,7 @@
 #define IOREQ_TYPE_PCI_CONFIG   2
 #define IOREQ_TYPE_TIMEOFFSET   7
 #define IOREQ_TYPE_INVALIDATE   8 /* mapcache */
+#define IOREQ_TYPE_INVAL_BFN    9 /* bfn */
 
 /*
  * VMExit dispatcher should cooperate with instruction decoder to
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index b47a3fe..3085be2 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -464,6 +464,10 @@ struct domain
     /* vNUMA topology accesses are protected by rwlock. */
     rwlock_t vnuma_rwlock;
     struct vnuma_info *vnuma;
+
+    /* Progress of cleaning m2b for domain destroy */
+    unsigned long m2b_destroy_mfn;
+    atomic_t m2b_count;
 };
 
 /* Protect updates/reads (resp.) of domain_list and domain_hash. */
-- 
1.7.12.4

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [RFC PATCH 6/7] common/pv-iommu: Add foreign ops to PV-IOMMU interface
       [not found] <1455099035-17649-1-git-send-email-malcolm.crossley@citrix.com>
                   ` (4 preceding siblings ...)
  2016-02-10 10:10 ` [RFC PATCH 5/7] x86/m2b: Add a tracking structure for mfn to bfn mappings per page Malcolm Crossley
@ 2016-02-10 10:10 ` Malcolm Crossley
  2016-02-17 21:11   ` Konrad Rzeszutek Wilk
  2016-02-10 10:10 ` [RFC PATCH 7/7] common/pv-iommu: Allow hardware_domain to pre IOMMU map foreign memory Malcolm Crossley
  2016-02-10 10:33 ` [RFC PATCH 0/7] Implement Xen PV-IOMMU interface Malcolm Crossley
  7 siblings, 1 reply; 21+ messages in thread
From: Malcolm Crossley @ 2016-02-10 10:10 UTC (permalink / raw)
  Cc: keir, ian.campbell, andrew.cooper3, tim, xen-devel, jbeulich,
	Malcolm Crossley, dgdegra

Currently the ops are X86 only due to a dependency on the
X86 only B2M implementation.

Foreign ops conform to draft D of PV-IOMMU design.

XSM control been implemented to allow full security control of
these priviledged operatins.

Page references and page locking are taken before using B2M
interface which is mandated by the B2M interface itself.

Signed-off-by: Malcolm Crossley <malcolm.crossley@citrix.com>
--
Cc: dgdegra@tycho.nsa.gov
Cc: jbeulich@suse.com
Cc: ian.campbell@citrix.com
Cc: keir@xen.org
Cc: tim@xen.org
Cc: andrew.cooper3@citrix.com
Cc: xen-devel@lists.xen.org
---
 xen/common/pv_iommu.c         | 269 ++++++++++++++++++++++++++++++++++++++++++
 xen/include/public/pv-iommu.h |  22 ++++
 xen/include/xsm/dummy.h       |   6 +
 xen/include/xsm/xsm.h         |   6 +
 xen/xsm/dummy.c               |   1 +
 5 files changed, 304 insertions(+)

diff --git a/xen/common/pv_iommu.c b/xen/common/pv_iommu.c
index 91485f3..bbf1a21 100644
--- a/xen/common/pv_iommu.c
+++ b/xen/common/pv_iommu.c
@@ -21,7 +21,12 @@
 #include <asm/event.h>
 #include <xen/guest_access.h>
 #include <public/pv-iommu.h>
+#include <xsm/xsm.h>
 
+#ifdef CONFIG_X86
+#include <asm/m2b.h>
+#include <asm/setup.h>
+#endif
 #define ret_t long
 
 static int get_paged_frame(unsigned long gfn, unsigned long *frame,
@@ -79,6 +84,7 @@ void do_iommu_sub_op(struct pv_iommu_op *op)
 {
     struct domain *d = current->domain;
     struct domain *rd = NULL;
+    int ret;
 
     /* Only order 0 pages supported */
     if ( IOMMU_get_page_order(op->flags) != 0 )
@@ -183,7 +189,270 @@ void do_iommu_sub_op(struct pv_iommu_op *op)
             op->status = 0;
             break;
         }
+#ifdef CONFIG_X86
+        case IOMMUOP_map_foreign_page:
+        {
+            unsigned long mfn, tmp;
+            unsigned int flags = 0;
+            struct page_info *page = NULL;
+            struct m2b_entry *m2b_e;
+            int locked;
+
+            /* Check if calling domain can create IOMMU mappings */
+            if ( !can_use_iommu_check(d) )
+            {
+                op->status = -EPERM;
+                goto finish;
+            }
+
+            rd = rcu_lock_domain_by_any_id(op->u.map_foreign_page.domid);
+            if ( !rd )
+            {
+                op->status = -ENXIO;
+                goto finish;
+            }
+
+            /* Only HVM domains can have their pages foreign mapped */
+            if ( is_pv_domain(rd) )
+            {
+                op->status = -EPERM;
+                goto finish;
+            }
+
+            if ( d->domain_id == op->u.map_foreign_page.domid ||
+                    op->u.map_foreign_page.domid == DOMID_SELF )
+            {
+                op->status = -EPERM;
+                goto finish;
+            }
+
+            /* Check for privilege over remote domain*/
+            if ( xsm_iommu_control(XSM_DM_PRIV, rd, op->subop_id) )
+            {
+                op->status = -EPERM;
+                goto finish;
+            }
+
+            /* Lookup page struct backing gfn */
+            if ( get_paged_frame(op->u.map_foreign_page.gfn, &mfn, &page, 0,
+                        rd) )
+            {
+                op->status = -ENXIO;
+                goto finish;
+            }
+            /* Check M2B for existing mapping */
+            m2b_e = lookup_m2b_entry(page, d,
+                        op->u.map_foreign_page.ioserver,
+                        op->u.map_foreign_page.bfn);
+
+            /* M2B already exists for domid, gfn, ioserver combination */
+            if ( m2b_e )
+            {
+                put_page(page);
+                op->status = 0;
+                goto finish;
+            }
+
+            if ( !mfn_valid(mfn) || xen_in_range(mfn) ||
+                 is_xen_heap_page(page)  ||
+                 (page->count_info & PGC_allocated) ||
+                 ( (page->count_info & PGC_count_mask) < 2 ))
+            {
+                put_page(page);
+                op->status = -EPERM;
+                goto finish;
+            }
+
+            /* Check for conflict with existing BFN mapping */
+            if ( !iommu_lookup_page(d, op->u.map_foreign_page.bfn, &tmp) )
+            {
+                put_page(page);
+                op->status = -EPERM;
+                goto finish;
+            }
+
+            if ( op->flags & IOMMU_OP_readable )
+                flags |= IOMMUF_readable;
+
+            if ( op->flags & IOMMU_OP_writeable )
+                flags |= IOMMUF_writable;
+
+            if ( iommu_map_page(d, op->u.map_foreign_page.bfn, mfn, flags) )
+            {
+                put_page(page);
+                op->status = -EIO;
+                goto finish;
+            }
+            /* Add M2B entry */
+            locked = page_lock(page);
+            ret = add_m2b_entry(page, d,
+                        op->u.map_foreign_page.ioserver,
+                        op->u.map_foreign_page.bfn);
+            atomic_inc(&rd->m2b_count);
+            if ( locked )
+                page_unlock(page);
+            if ( ret )
+            {
+                if ( iommu_unmap_page(d, op->u.map_foreign_page.bfn) )
+                    domain_crash(d);
+
+                put_page(page);
+                op->status = -ENOMEM;
+                goto finish;
+            }
+
+            op->status = 0;
+            break;
+        }
+        case IOMMUOP_lookup_foreign_page:
+        {
+            unsigned long mfn;
+            struct page_info *page = NULL;
+            struct m2b_entry *m2b_e;
+            int rc;
+            int locked;
 
+            if ( d->domain_id == op->u.lookup_foreign_page.domid ||
+                 op->u.lookup_foreign_page.domid == DOMID_SELF )
+            {
+                op->status = -EPERM;
+                goto finish;
+            }
+
+            rd = rcu_lock_domain_by_any_id(op->u.lookup_foreign_page.domid);
+
+            if ( !rd )
+            {
+                op->status = -ENXIO;
+                goto finish;
+            }
+
+            /* Only HVM domains can have their pages foreign mapped */
+            if ( is_pv_domain(rd) )
+            {
+                op->status = -EPERM;
+                goto finish;
+            }
+
+            /* Check for privilege */
+            if ( xsm_iommu_control(XSM_DM_PRIV, rd, op->subop_id) )
+            {
+                op->status = -EPERM;
+                goto finish;
+            }
+
+            /* Lookup page struct backing gfn */
+            if ( (rc = get_paged_frame(op->u.lookup_foreign_page.gfn, &mfn, &page, 0,
+                                 rd)) )
+            {
+                op->status = -ENXIO; // Should this be something else?
+                goto finish;
+            }
+
+            /* Check M2B for existing mapping */
+            m2b_e = lookup_m2b_entry(page, d,
+                                     op->u.lookup_foreign_page.ioserver,
+                                     BFN_ANY);
+
+            /* M2B already exists for domid, gfn, ioserver combination */
+            if ( m2b_e )
+            {
+                put_page(page);
+                op->u.lookup_foreign_page.bfn = m2b_e->bfn;
+                op->status = 0;
+                goto finish;
+            }
+
+            /* Only create BFN mappings for guest mapped memory */
+            if ( !(page->count_info & PGC_allocated) ||
+                 ( (page->count_info & PGC_count_mask) < 2 ))
+            {
+                    put_page(page);
+                    op->status = -EPERM;
+                    goto finish;
+            }
+
+            /* Check if IOMMU is disabled/bypassed */
+            if ( !can_use_iommu_check(d) )
+            {
+                /* Add M2B entry using MFN */
+                locked = page_lock(page);
+                ret = add_m2b_entry(page, d,
+                                    op->u.lookup_foreign_page.ioserver, mfn);
+                atomic_inc(&rd->m2b_count);
+                if ( locked )
+                    page_unlock(page);
+                if ( ret )
+                {
+                   put_page(page);
+                   op->status = -ENOMEM;
+                   goto finish;
+                }
+                op->u.lookup_foreign_page.bfn = mfn;
+            }
+            op->status = 0;
+            break;
+        }
+        case IOMMUOP_unmap_foreign_page:
+        {
+            struct m2b_entry *m2b_e;
+            struct page_info *page;
+            unsigned long mfn;
+            int locked;
+
+
+            if ( !can_use_iommu_check(d) )
+            {
+                page = mfn_to_page(op->u.unmap_foreign_page.bfn);
+            }
+            else
+            {
+                /* Check if there is a valid BFN mapping for this domain */
+                if ( iommu_lookup_page(d, op->u.unmap_foreign_page.bfn, &mfn) )
+                {
+                   op->status = -ENOENT;
+                   goto finish;
+                }
+                /* Use MFN from B2M mapping to lookup page */
+                page = mfn_to_page(mfn);
+            }
+
+            if ( !page )
+            {
+               op->status = -ENOENT;
+               goto finish;
+            }
+
+            /* Try to remove the M2B mapping */
+            locked = page_lock(page);
+            ret = del_m2b_entry(page, d,
+                                op->u.unmap_foreign_page.ioserver,
+                                op->u.unmap_foreign_page.bfn);
+            atomic_dec(&page_get_owner(page)->m2b_count);
+            if ( locked )
+            page_unlock(page);
+            if ( ret )
+            {
+               op->status = -ENOENT;
+               goto finish;
+            }
+
+            if ( can_use_iommu_check(d) )
+            {
+                /* Check if there are any M2B mappings left for this domain */
+                m2b_e = lookup_m2b_entry(page, d,
+                                         IOSERVER_ANY,
+                                         op->u.unmap_foreign_page.bfn);
+
+                /* No M2B left for this bfn so IOMMU unmap it */
+                if ( !m2b_e )
+                {
+                    if ( iommu_unmap_page(d, op->u.map_foreign_page.bfn) )
+                        domain_crash(d);
+                }
+            }
+        }
+#endif
         default:
             op->status = -ENODEV;
             break;
diff --git a/xen/include/public/pv-iommu.h b/xen/include/public/pv-iommu.h
index 0f19f96..366037c 100644
--- a/xen/include/public/pv-iommu.h
+++ b/xen/include/public/pv-iommu.h
@@ -26,6 +26,9 @@
 #define IOMMUOP_query_caps            1
 #define IOMMUOP_map_page              2
 #define IOMMUOP_unmap_page            3
+#define IOMMUOP_map_foreign_page      4
+#define IOMMUOP_lookup_foreign_page   5
+#define IOMMUOP_unmap_foreign_page    6
 
 struct pv_iommu_op {
     uint16_t subop_id;
@@ -49,6 +52,25 @@ struct pv_iommu_op {
         struct {
             uint64_t bfn;
         } unmap_page;
+
+        struct {
+            uint64_t bfn;
+            uint64_t gfn;
+            uint16_t domid;
+            uint16_t ioserver;
+        } map_foreign_page;
+
+        struct {
+            uint64_t bfn;
+            uint64_t gfn;
+            uint16_t domid;
+            uint16_t ioserver;
+        } lookup_foreign_page;
+
+        struct {
+            uint64_t bfn;
+            uint16_t ioserver;
+        } unmap_foreign_page;
     } u;
 };
 
diff --git a/xen/include/xsm/dummy.h b/xen/include/xsm/dummy.h
index 1d13826..d0be2fe 100644
--- a/xen/include/xsm/dummy.h
+++ b/xen/include/xsm/dummy.h
@@ -566,6 +566,12 @@ static XSM_INLINE int xsm_vm_event_control(XSM_DEFAULT_ARG struct domain *d, int
     return xsm_default_action(action, current->domain, d);
 }
 
+static XSM_INLINE int xsm_iommu_control(XSM_DEFAULT_ARG struct domain *d, unsigned long op)
+{
+    XSM_ASSERT_ACTION(XSM_DM_PRIV);
+    return xsm_default_action(action, current->domain, d);
+}
+
 #ifdef CONFIG_HAS_MEM_ACCESS
 static XSM_INLINE int xsm_mem_access(XSM_DEFAULT_ARG struct domain *d)
 {
diff --git a/xen/include/xsm/xsm.h b/xen/include/xsm/xsm.h
index 3afed70..92e6e33 100644
--- a/xen/include/xsm/xsm.h
+++ b/xen/include/xsm/xsm.h
@@ -114,6 +114,7 @@ struct xsm_operations {
     int (*iomem_permission) (struct domain *d, uint64_t s, uint64_t e, uint8_t allow);
     int (*iomem_mapping) (struct domain *d, uint64_t s, uint64_t e, uint8_t allow);
     int (*pci_config_permission) (struct domain *d, uint32_t machine_bdf, uint16_t start, uint16_t end, uint8_t access);
+    int (*iommu_control) (struct domain *d, unsigned long op);
 
 #if defined(CONFIG_HAS_PASSTHROUGH) && defined(CONFIG_HAS_PCI)
     int (*get_device_group) (uint32_t machine_bdf);
@@ -605,6 +606,11 @@ static inline int xsm_vm_event_control (xsm_default_t def, struct domain *d, int
     return xsm_ops->vm_event_control(d, mode, op);
 }
 
+static inline int xsm_iommu_control(xsm_default_t def, struct domain *d, unsigned long op)
+{
+    return xsm_ops->iommu_control(d, op);
+}
+
 #ifdef CONFIG_HAS_MEM_ACCESS
 static inline int xsm_mem_access (xsm_default_t def, struct domain *d)
 {
diff --git a/xen/xsm/dummy.c b/xen/xsm/dummy.c
index 0f32636..e18bb3b 100644
--- a/xen/xsm/dummy.c
+++ b/xen/xsm/dummy.c
@@ -119,6 +119,7 @@ void xsm_fixup_ops (struct xsm_operations *ops)
     set_to_dummy_if_null(ops, hvm_altp2mhvm_op);
 
     set_to_dummy_if_null(ops, do_xsm_op);
+    set_to_dummy_if_null(ops, iommu_control);
 #ifdef CONFIG_COMPAT
     set_to_dummy_if_null(ops, do_compat_op);
 #endif
-- 
1.7.12.4

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [RFC PATCH 7/7] common/pv-iommu: Allow hardware_domain to pre IOMMU map foreign memory
       [not found] <1455099035-17649-1-git-send-email-malcolm.crossley@citrix.com>
                   ` (5 preceding siblings ...)
  2016-02-10 10:10 ` [RFC PATCH 6/7] common/pv-iommu: Add foreign ops to PV-IOMMU interface Malcolm Crossley
@ 2016-02-10 10:10 ` Malcolm Crossley
  2016-02-10 10:33 ` [RFC PATCH 0/7] Implement Xen PV-IOMMU interface Malcolm Crossley
  7 siblings, 0 replies; 21+ messages in thread
From: Malcolm Crossley @ 2016-02-10 10:10 UTC (permalink / raw)
  Cc: keir, george.dunlap, andrew.cooper3, xen-devel, jbeulich,
	Malcolm Crossley

If the hardware domain is relaxed IOMMU mode, then allow the
hardware domain to pre IOMMU map host memory so that foreign IOMMU maps
of guest memory are not necessary and only foreign lookups are required.

The security model is not weakened because the hardware domain in relaxed
IOMMU mode already had a IOMMU map of the entire system RAM.

A special hardware_domain premap array is used to track the hardware
domains current IOMMU mapppings.

This optimisation allows for emulators to not suffer a IOMMU flush delay
for each batch of foreign page(s) they attempt to map/unmap.

Signed-off-by: Malcolm Crossley <malcolm.crossley@citrix.com>
--
Cc: keir@xen.org
Cc: jbeulich@suse.com
Cc: andrew.cooper3@citrix.com
Cc: george.dunlap@eu.citrix.com
Cc: xen-devel@lists.xen.org
---
 xen/arch/x86/mm/m2b.c         |   2 +
 xen/common/pv_iommu.c         | 100 ++++++++++++++++++++++++++++++++++++++++++
 xen/include/asm-x86/m2b.h     |   6 +++
 xen/include/public/pv-iommu.h |   2 +
 4 files changed, 110 insertions(+)

diff --git a/xen/arch/x86/mm/m2b.c b/xen/arch/x86/mm/m2b.c
index 078944a..59d4d02 100644
--- a/xen/arch/x86/mm/m2b.c
+++ b/xen/arch/x86/mm/m2b.c
@@ -31,6 +31,8 @@ struct m2b_head
 
 DEFINE_RCU_READ_LOCK(m2b_rcu);
 
+uint64_t **hwdom_premap_m2b;
+
 struct m2b_entry *lookup_m2b_entry(struct page_info *page, struct domain *d,
                                    ioservid_t ioserver, unsigned long bfn)
 {
diff --git a/xen/common/pv_iommu.c b/xen/common/pv_iommu.c
index bbf1a21..944d723 100644
--- a/xen/common/pv_iommu.c
+++ b/xen/common/pv_iommu.c
@@ -102,6 +102,13 @@ void do_iommu_sub_op(struct pv_iommu_op *op)
             if ( can_use_iommu_check(d) )
                 op->flags |= IOMMU_QUERY_map_cap;
 
+            if ( is_hardware_domain(d) && !d->need_iommu )
+            {
+                op->flags |= IOMMU_QUERY_map_all_mfns;
+                hwdom_premap_m2b = xzalloc_array(unsigned long *,
+                                        ((sizeof(unsigned long) * max_pdx)/
+                                        PAGE_SIZE) + 1);
+            }
             break;
         }
         case IOMMUOP_map_page:
@@ -165,12 +172,40 @@ void do_iommu_sub_op(struct pv_iommu_op *op)
                 goto finish;
             }
 
+            /* Add to M2B with wildcard ioserver entry */
+            if ( is_hardware_domain(d) && (op->flags & IOMMU_MAP_OP_add_m2b ))
+            {
+                if ( !hwdom_premap_m2b )
+                {
+                    op->status = -EPERM;
+                    goto finish;
+                }
+                /* Check if tracking page is allocated */
+                if ( !hwdom_premap_m2b[PREMAP_M2B_PAGE(mfn)] )
+                {
+                    hwdom_premap_m2b[PREMAP_M2B_PAGE(mfn)] =
+                            alloc_xenheap_page();
+                    if ( !hwdom_premap_m2b[PREMAP_M2B_PAGE(mfn)] )
+                    {
+                        op->status = -ENOMEM;
+                        goto finish;
+                    }
+                    clear_page(hwdom_premap_m2b[PREMAP_M2B_PAGE(mfn)]);
+                } else if ( read_atomic(&PREMAP_M2B(mfn)) )
+                {
+                    op->status = -EPERM;
+                    goto finish;
+                }
+
+                write_atomic(&PREMAP_M2B(mfn), op->u.map_page.bfn);
+            }
             op->status = 0;
             break;
         }
 
         case IOMMUOP_unmap_page:
         {
+            struct page_info *page;
             unsigned long mfn;
 
             /* Check if there is a valid BFN mapping for this domain */
@@ -186,6 +221,22 @@ void do_iommu_sub_op(struct pv_iommu_op *op)
                 goto finish;
             }
 
+            /* Use MFN from B2M mapping to lookup page */
+            page = mfn_to_page(mfn);
+
+            /* Remove wildcard M2B mapping */
+            if ( is_hardware_domain(d) &&
+                (op->flags & IOMMU_UNMAP_OP_remove_m2b) &&
+                hwdom_premap_m2b &&
+                hwdom_premap_m2b[PREMAP_M2B_PAGE(mfn)] &&
+                read_atomic(&PREMAP_M2B(mfn)) )
+            {
+                /* Remove M2B entry */
+                write_atomic(&PREMAP_M2B(mfn), 0);
+            }
+            if ( !(op->flags & IOMMU_MAP_OP_no_ref_cnt) )
+                put_page(page);
+
             op->status = 0;
             break;
         }
@@ -390,6 +441,44 @@ void do_iommu_sub_op(struct pv_iommu_op *op)
                 }
                 op->u.lookup_foreign_page.bfn = mfn;
             }
+            else if ( is_hardware_domain(d) )
+            {
+                uint64_t bfn;
+                /* Check if a premap already exists */
+                if ( !hwdom_premap_m2b ||
+                     !hwdom_premap_m2b[PREMAP_M2B_PAGE(mfn)])
+                {
+                    put_page(page);
+                    op->status = -ENOENT;
+                    goto finish;
+                }
+
+                bfn = read_atomic(&PREMAP_M2B(mfn));
+
+                /* Check if BFN is non zero */
+                if ( !bfn )
+                {
+                    put_page(page);
+                    op->status = -ENOENT;
+                    goto finish;
+                }
+
+                locked = page_lock(page);
+                ret = add_m2b_entry(page, d,
+                                    op->u.lookup_foreign_page.ioserver,
+                                    bfn);
+                atomic_inc(&rd->m2b_count);
+                if ( locked )
+                    page_unlock(page);
+
+                if ( ret )
+                {
+                   put_page(page);
+                   op->status = -ENOMEM;
+                   goto finish;
+                }
+                op->u.lookup_foreign_page.bfn = bfn;
+            }
             op->status = 0;
             break;
         }
@@ -437,6 +526,12 @@ void do_iommu_sub_op(struct pv_iommu_op *op)
                goto finish;
             }
 
+            /* Check if hwdom IOMMU premap is present */
+            if ( is_hardware_domain(d) && can_use_iommu_check(d) &&
+                 hwdom_premap_m2b && hwdom_premap_m2b[PREMAP_M2B_PAGE(mfn)] &&
+                 (read_atomic(&PREMAP_M2B(mfn)) == op->u.unmap_foreign_page.bfn) )
+                goto foreign_unmap_done;
+
             if ( can_use_iommu_check(d) )
             {
                 /* Check if there are any M2B mappings left for this domain */
@@ -451,6 +546,11 @@ void do_iommu_sub_op(struct pv_iommu_op *op)
                         domain_crash(d);
                 }
             }
+foreign_unmap_done:
+            /* Remove the reference to the page */
+            put_page(page);
+            op->status = 0;
+        break;
         }
 #endif
         default:
diff --git a/xen/include/asm-x86/m2b.h b/xen/include/asm-x86/m2b.h
index a21502c..4d06b08 100644
--- a/xen/include/asm-x86/m2b.h
+++ b/xen/include/asm-x86/m2b.h
@@ -46,6 +46,12 @@ int add_m2b_entry(struct page_info *page, struct domain *d,
 int del_m2b_entry(struct page_info *page, struct domain *d, ioservid_t ioserver,
                   unsigned long bfn);
 
+extern uint64_t **hwdom_premap_m2b;
+
+#define PREMAP_M2B_PAGE(x) ( pfn_to_pdx(x) / (PAGE_SIZE/sizeof(uint64_t) ) )
+#define PREMAP_M2B_IDX(x) ( pfn_to_pdx(x) % (PAGE_SIZE/sizeof(uint64_t) ) )
+#define PREMAP_M2B(x) hwdom_premap_m2b[PREMAP_M2B_PAGE(x)][PREMAP_M2B_IDX(x)]
+
 #endif
 
 /*
diff --git a/xen/include/public/pv-iommu.h b/xen/include/public/pv-iommu.h
index 366037c..4505f3e 100644
--- a/xen/include/public/pv-iommu.h
+++ b/xen/include/public/pv-iommu.h
@@ -40,6 +40,8 @@ struct pv_iommu_op {
 #define IOMMU_OP_readable (1 << 0)
 #define IOMMU_OP_writeable (1 << 1)
 #define IOMMU_MAP_OP_no_ref_cnt (1 << 2)
+#define IOMMU_MAP_OP_add_m2b (1 << 3)
+#define IOMMU_UNMAP_OP_remove_m2b (1 << 0)
     uint16_t flags;
     int32_t status;
 
-- 
1.7.12.4

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [RFC PATCH 0/7] Implement Xen PV-IOMMU interface
       [not found] <1455099035-17649-1-git-send-email-malcolm.crossley@citrix.com>
                   ` (6 preceding siblings ...)
  2016-02-10 10:10 ` [RFC PATCH 7/7] common/pv-iommu: Allow hardware_domain to pre IOMMU map foreign memory Malcolm Crossley
@ 2016-02-10 10:33 ` Malcolm Crossley
  2016-02-17 20:12   ` Konrad Rzeszutek Wilk
                     ` (2 more replies)
  7 siblings, 3 replies; 21+ messages in thread
From: Malcolm Crossley @ 2016-02-10 10:33 UTC (permalink / raw)
  To: xen-devel
  Cc: kevin.tian, keir, ian.campbell, george.dunlap, andrew.cooper3,
	tim, jbeulich, feng.wu, dgdegra

This RFC series implements the PV-IOMMU interface according to the PV-IOMMU
design draft D.

The PV-IOMMU interface is currently restricted to being used by the hardware
domain only as non hardware domain callers have not been fully tested yet.

Significant effort was put into implementing a m2b tracking structure without
increasing the size of struct page but no union was found that could be safely
used in all cases when a page is allocated to an HVM domain. Comments and
feedback on the m2b design are most welcome.

The hardware domain specific IOMMU pre map mechanism was implemented in order
to keep performance parity with current out of tree mechanisms to obtain BFNs
for foreign guest owned memory. The pre map mechanism is not a weakening of
the current security model of Xen and is only allowed when the hardware domain
is allowed relaxed IOMMU mappings.


Malcolm Crossley (7):
  common/pv-iommu: Add stub hypercall for PV-IOMMU
  iommu: add iommu_lookup_page to lookup guest gfn for a particular
    IOMMU mapping
  VT-d: Add iommu_lookup_page support
  common/pv-iommu: Add query, map and unmap ops
  x86/m2b: Add a tracking structure for mfn to bfn mappings per page
  common/pv-iommu: Add foreign ops to PV-IOMMU interface
  common/pv-iommu: Allow hardware_domain to pre IOMMU map foreign
    memory

 xen/arch/x86/domain.c               |  12 +-
 xen/arch/x86/mm/Makefile            |   1 +
 xen/arch/x86/mm/m2b.c               | 211 ++++++++++++
 xen/arch/x86/x86_64/compat/entry.S  |   2 +
 xen/arch/x86/x86_64/entry.S         |   2 +
 xen/common/Makefile                 |   1 +
 xen/common/memory.c                 |  11 +
 xen/common/pv_iommu.c               | 633 ++++++++++++++++++++++++++++++++++++
 xen/drivers/passthrough/iommu.c     |  21 ++
 xen/drivers/passthrough/vtd/iommu.c |  31 ++
 xen/drivers/passthrough/vtd/iommu.h |   1 +
 xen/include/asm-x86/domain.h        |   1 +
 xen/include/asm-x86/m2b.h           |  65 ++++
 xen/include/asm-x86/mm.h            |  12 +-
 xen/include/public/hvm/ioreq.h      |   1 +
 xen/include/public/pv-iommu.h       |  93 ++++++
 xen/include/public/xen.h            |   1 +
 xen/include/xen/iommu.h             |   2 +
 xen/include/xen/sched.h             |   4 +
 xen/include/xsm/dummy.h             |   6 +
 xen/include/xsm/xsm.h               |   6 +
 xen/xsm/dummy.c                     |   1 +
 22 files changed, 1116 insertions(+), 2 deletions(-)
 create mode 100644 xen/arch/x86/mm/m2b.c
 create mode 100644 xen/common/pv_iommu.c
 create mode 100644 xen/include/asm-x86/m2b.h
 create mode 100644 xen/include/public/pv-iommu.h

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC PATCH 1/7] common/pv-iommu: Add stub hypercall for PV-IOMMU
  2016-02-10 10:10 ` [RFC PATCH 1/7] common/pv-iommu: Add stub hypercall for PV-IOMMU Malcolm Crossley
@ 2016-02-10 10:36   ` Malcolm Crossley
  2016-02-17 20:09   ` Konrad Rzeszutek Wilk
  1 sibling, 0 replies; 21+ messages in thread
From: Malcolm Crossley @ 2016-02-10 10:36 UTC (permalink / raw)
  Cc: keir, xen-devel

I forgot to CC xen-devel for the cover letter for this series.
You can access the cover letter in the xen-devel archives here:
http://lists.xen.org/archives/html/xen-devel/2016-02/msg01441.html

On 10/02/16 10:10, Malcolm Crossley wrote:
> Signed-off-by: Malcolm Crossley <malcolm.crossley@citrix.com>
> --
> Cc: jbeulich@suse.com
> Cc: andrew.cooper3@citrix.com
> Cc: ian.campbell@citrix.com
> Cc: keir@xen.org
> Cc: tim@xen.org
> Cc: xen-devel@lists.xen.org
> ---
>  xen/arch/x86/x86_64/compat/entry.S |  2 ++
>  xen/arch/x86/x86_64/entry.S        |  2 ++
>  xen/common/Makefile                |  1 +
>  xen/common/pv_iommu.c              | 38 ++++++++++++++++++++++++++++++++++++++
>  xen/include/public/xen.h           |  1 +
>  5 files changed, 44 insertions(+)
>  create mode 100644 xen/common/pv_iommu.c
> 
> diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S
> index 3088aa7..53a1689 100644
> --- a/xen/arch/x86/x86_64/compat/entry.S
> +++ b/xen/arch/x86/x86_64/compat/entry.S
> @@ -436,6 +436,7 @@ ENTRY(compat_hypercall_table)
>          .quad do_tmem_op
>          .quad do_ni_hypercall           /* reserved for XenClient */
>          .quad do_xenpmu_op              /* 40 */
> +        .quad do_iommu_op
>          .rept __HYPERVISOR_arch_0-((.-compat_hypercall_table)/8)
>          .quad compat_ni_hypercall
>          .endr
> @@ -487,6 +488,7 @@ ENTRY(compat_hypercall_args_table)
>          .byte 1 /* do_tmem_op               */
>          .byte 0 /* reserved for XenClient   */
>          .byte 2 /* do_xenpmu_op             */  /* 40 */
> +        .byte 2 /* do_iommu_op              */
>          .rept __HYPERVISOR_arch_0-(.-compat_hypercall_args_table)
>          .byte 0 /* compat_ni_hypercall      */
>          .endr
> diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
> index 94a54aa..fee7191 100644
> --- a/xen/arch/x86/x86_64/entry.S
> +++ b/xen/arch/x86/x86_64/entry.S
> @@ -769,6 +769,7 @@ ENTRY(hypercall_table)
>          .quad do_tmem_op
>          .quad do_ni_hypercall       /* reserved for XenClient */
>          .quad do_xenpmu_op          /* 40 */
> +        .quad do_iommu_op
>          .rept __HYPERVISOR_arch_0-((.-hypercall_table)/8)
>          .quad do_ni_hypercall
>          .endr
> @@ -820,6 +821,7 @@ ENTRY(hypercall_args_table)
>          .byte 1 /* do_tmem_op           */
>          .byte 0 /* reserved for XenClient */
>          .byte 2 /* do_xenpmu_op         */  /* 40 */
> +        .byte 2 /* do_iommu_op          */
>          .rept __HYPERVISOR_arch_0-(.-hypercall_args_table)
>          .byte 0 /* do_ni_hypercall      */
>          .endr
> diff --git a/xen/common/Makefile b/xen/common/Makefile
> index 6e82b33..b498589 100644
> --- a/xen/common/Makefile
> +++ b/xen/common/Makefile
> @@ -25,6 +25,7 @@ obj-y += notifier.o
>  obj-y += page_alloc.o
>  obj-$(CONFIG_HAS_PDX) += pdx.o
>  obj-y += preempt.o
> +obj-y += pv_iommu.o
>  obj-y += random.o
>  obj-y += rangeset.o
>  obj-y += radix-tree.o
> diff --git a/xen/common/pv_iommu.c b/xen/common/pv_iommu.c
> new file mode 100644
> index 0000000..304fccf
> --- /dev/null
> +++ b/xen/common/pv_iommu.c
> @@ -0,0 +1,38 @@
> +/******************************************************************************
> + * common/pv_iommu.c
> + * 
> + * Paravirtualised IOMMU functionality
> + * 
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + * 
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + * 
> + * You should have received a copy of the GNU General Public License
> + * along with this program; If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#include <xen/guest_access.h>
> +
> +#define ret_t long
> +
> +ret_t do_iommu_op(XEN_GUEST_HANDLE_PARAM(void) arg, unsigned int count)
> +{
> +    return -ENOSYS;
> +}
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * tab-width: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
> +
> diff --git a/xen/include/public/xen.h b/xen/include/public/xen.h
> index 7b629b1..ff50e7a 100644
> --- a/xen/include/public/xen.h
> +++ b/xen/include/public/xen.h
> @@ -102,6 +102,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_ulong_t);
>  #define __HYPERVISOR_tmem_op              38
>  #define __HYPERVISOR_xc_reserved_op       39 /* reserved for XenClient */
>  #define __HYPERVISOR_xenpmu_op            40
> +#define __HYPERVISOR_iommu_op             41
>  
>  /* Architecture-specific hypercall definitions. */
>  #define __HYPERVISOR_arch_0               48
> 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC PATCH 1/7] common/pv-iommu: Add stub hypercall for PV-IOMMU
  2016-02-10 10:10 ` [RFC PATCH 1/7] common/pv-iommu: Add stub hypercall for PV-IOMMU Malcolm Crossley
  2016-02-10 10:36   ` Malcolm Crossley
@ 2016-02-17 20:09   ` Konrad Rzeszutek Wilk
  1 sibling, 0 replies; 21+ messages in thread
From: Konrad Rzeszutek Wilk @ 2016-02-17 20:09 UTC (permalink / raw)
  To: Malcolm Crossley
  Cc: keir, ian.campbell, andrew.cooper3, tim, xen-devel, jbeulich

On Wed, Feb 10, 2016 at 10:10:29AM +0000, Malcolm Crossley wrote:
> Signed-off-by: Malcolm Crossley <malcolm.crossley@citrix.com>
> --
> Cc: jbeulich@suse.com
> Cc: andrew.cooper3@citrix.com
> Cc: ian.campbell@citrix.com
> Cc: keir@xen.org
> Cc: tim@xen.org
> Cc: xen-devel@lists.xen.org
> ---
>  xen/arch/x86/x86_64/compat/entry.S |  2 ++
>  xen/arch/x86/x86_64/entry.S        |  2 ++
>  xen/common/Makefile                |  1 +
>  xen/common/pv_iommu.c              | 38 ++++++++++++++++++++++++++++++++++++++
>  xen/include/public/xen.h           |  1 +
>  5 files changed, 44 insertions(+)
>  create mode 100644 xen/common/pv_iommu.c
> 
> diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S
> index 3088aa7..53a1689 100644
> --- a/xen/arch/x86/x86_64/compat/entry.S
> +++ b/xen/arch/x86/x86_64/compat/entry.S
> @@ -436,6 +436,7 @@ ENTRY(compat_hypercall_table)
>          .quad do_tmem_op
>          .quad do_ni_hypercall           /* reserved for XenClient */
>          .quad do_xenpmu_op              /* 40 */
> +        .quad do_iommu_op
>          .rept __HYPERVISOR_arch_0-((.-compat_hypercall_table)/8)
>          .quad compat_ni_hypercall
>          .endr
> @@ -487,6 +488,7 @@ ENTRY(compat_hypercall_args_table)
>          .byte 1 /* do_tmem_op               */
>          .byte 0 /* reserved for XenClient   */
>          .byte 2 /* do_xenpmu_op             */  /* 40 */
> +        .byte 2 /* do_iommu_op              */
>          .rept __HYPERVISOR_arch_0-(.-compat_hypercall_args_table)
>          .byte 0 /* compat_ni_hypercall      */
>          .endr
> diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
> index 94a54aa..fee7191 100644
> --- a/xen/arch/x86/x86_64/entry.S
> +++ b/xen/arch/x86/x86_64/entry.S
> @@ -769,6 +769,7 @@ ENTRY(hypercall_table)
>          .quad do_tmem_op
>          .quad do_ni_hypercall       /* reserved for XenClient */
>          .quad do_xenpmu_op          /* 40 */
> +        .quad do_iommu_op
>          .rept __HYPERVISOR_arch_0-((.-hypercall_table)/8)
>          .quad do_ni_hypercall
>          .endr
> @@ -820,6 +821,7 @@ ENTRY(hypercall_args_table)
>          .byte 1 /* do_tmem_op           */
>          .byte 0 /* reserved for XenClient */
>          .byte 2 /* do_xenpmu_op         */  /* 40 */
> +        .byte 2 /* do_iommu_op          */
>          .rept __HYPERVISOR_arch_0-(.-hypercall_args_table)
>          .byte 0 /* do_ni_hypercall      */
>          .endr
> diff --git a/xen/common/Makefile b/xen/common/Makefile
> index 6e82b33..b498589 100644
> --- a/xen/common/Makefile
> +++ b/xen/common/Makefile
> @@ -25,6 +25,7 @@ obj-y += notifier.o
>  obj-y += page_alloc.o
>  obj-$(CONFIG_HAS_PDX) += pdx.o
>  obj-y += preempt.o
> +obj-y += pv_iommu.o

Perhaps have an Kconfig entry for it?

Also you seem to be missing ARM code?

>  obj-y += random.o
>  obj-y += rangeset.o
>  obj-y += radix-tree.o
> diff --git a/xen/common/pv_iommu.c b/xen/common/pv_iommu.c
> new file mode 100644
> index 0000000..304fccf
> --- /dev/null
> +++ b/xen/common/pv_iommu.c
> @@ -0,0 +1,38 @@
> +/******************************************************************************
> + * common/pv_iommu.c
> + * 
> + * Paravirtualised IOMMU functionality
> + * 
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + * 
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + * 
> + * You should have received a copy of the GNU General Public License
> + * along with this program; If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#include <xen/guest_access.h>
> +
> +#define ret_t long

? What is wrong with just using 'long'?

> +
> +ret_t do_iommu_op(XEN_GUEST_HANDLE_PARAM(void) arg, unsigned int count)
> +{
> +    return -ENOSYS;
> +}
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * tab-width: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
> +
> diff --git a/xen/include/public/xen.h b/xen/include/public/xen.h
> index 7b629b1..ff50e7a 100644
> --- a/xen/include/public/xen.h
> +++ b/xen/include/public/xen.h
> @@ -102,6 +102,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_ulong_t);
>  #define __HYPERVISOR_tmem_op              38
>  #define __HYPERVISOR_xc_reserved_op       39 /* reserved for XenClient */
>  #define __HYPERVISOR_xenpmu_op            40
> +#define __HYPERVISOR_iommu_op             41

I would think there would be an pv_iommu.h header file as well?
>  
>  /* Architecture-specific hypercall definitions. */
>  #define __HYPERVISOR_arch_0               48
> -- 
> 1.7.12.4
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC PATCH 0/7] Implement Xen PV-IOMMU interface
  2016-02-10 10:33 ` [RFC PATCH 0/7] Implement Xen PV-IOMMU interface Malcolm Crossley
@ 2016-02-17 20:12   ` Konrad Rzeszutek Wilk
  2016-02-24 18:08   ` George Dunlap
  2016-11-22 16:21   ` Martin Cerveny
  2 siblings, 0 replies; 21+ messages in thread
From: Konrad Rzeszutek Wilk @ 2016-02-17 20:12 UTC (permalink / raw)
  To: Malcolm Crossley
  Cc: kevin.tian, keir, ian.campbell, george.dunlap, andrew.cooper3,
	tim, xen-devel, jbeulich, feng.wu, dgdegra

On Wed, Feb 10, 2016 at 10:33:24AM +0000, Malcolm Crossley wrote:
> This RFC series implements the PV-IOMMU interface according to the PV-IOMMU
> design draft D.
> 
> The PV-IOMMU interface is currently restricted to being used by the hardware
> domain only as non hardware domain callers have not been fully tested yet.
> 
> Significant effort was put into implementing a m2b tracking structure without
> increasing the size of struct page but no union was found that could be safely
> used in all cases when a page is allocated to an HVM domain. Comments and
> feedback on the m2b design are most welcome.
> 
> The hardware domain specific IOMMU pre map mechanism was implemented in order
> to keep performance parity with current out of tree mechanisms to obtain BFNs
> for foreign guest owned memory. The pre map mechanism is not a weakening of
> the current security model of Xen and is only allowed when the hardware domain
> is allowed relaxed IOMMU mappings.

I would recommend you add to this patchset:

- docs/misc/pv-iommu.txt describing the design of this.
- Add yourself to the maintainers file for this code: xen/common/pv_iommu? perhaps?
- Compile this under ARM as well - and figure out what is missing there?
- Add an KConfig entry so folks have the option of not compiling the Xen
  hypervisor with this?
- Have these patches in a git tree for easier testing.

> 
> 
> Malcolm Crossley (7):
>   common/pv-iommu: Add stub hypercall for PV-IOMMU
>   iommu: add iommu_lookup_page to lookup guest gfn for a particular
>     IOMMU mapping
>   VT-d: Add iommu_lookup_page support
>   common/pv-iommu: Add query, map and unmap ops
>   x86/m2b: Add a tracking structure for mfn to bfn mappings per page
>   common/pv-iommu: Add foreign ops to PV-IOMMU interface
>   common/pv-iommu: Allow hardware_domain to pre IOMMU map foreign
>     memory
> 
>  xen/arch/x86/domain.c               |  12 +-
>  xen/arch/x86/mm/Makefile            |   1 +
>  xen/arch/x86/mm/m2b.c               | 211 ++++++++++++
>  xen/arch/x86/x86_64/compat/entry.S  |   2 +
>  xen/arch/x86/x86_64/entry.S         |   2 +
>  xen/common/Makefile                 |   1 +
>  xen/common/memory.c                 |  11 +
>  xen/common/pv_iommu.c               | 633 ++++++++++++++++++++++++++++++++++++
>  xen/drivers/passthrough/iommu.c     |  21 ++
>  xen/drivers/passthrough/vtd/iommu.c |  31 ++
>  xen/drivers/passthrough/vtd/iommu.h |   1 +
>  xen/include/asm-x86/domain.h        |   1 +
>  xen/include/asm-x86/m2b.h           |  65 ++++
>  xen/include/asm-x86/mm.h            |  12 +-
>  xen/include/public/hvm/ioreq.h      |   1 +
>  xen/include/public/pv-iommu.h       |  93 ++++++
>  xen/include/public/xen.h            |   1 +
>  xen/include/xen/iommu.h             |   2 +
>  xen/include/xen/sched.h             |   4 +
>  xen/include/xsm/dummy.h             |   6 +
>  xen/include/xsm/xsm.h               |   6 +
>  xen/xsm/dummy.c                     |   1 +
>  22 files changed, 1116 insertions(+), 2 deletions(-)
>  create mode 100644 xen/arch/x86/mm/m2b.c
>  create mode 100644 xen/common/pv_iommu.c
>  create mode 100644 xen/include/asm-x86/m2b.h
>  create mode 100644 xen/include/public/pv-iommu.h
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC PATCH 2/7] iommu: add iommu_lookup_page to lookup guest gfn for a particular IOMMU mapping
  2016-02-10 10:10 ` [RFC PATCH 2/7] iommu: add iommu_lookup_page to lookup guest gfn for a particular IOMMU mapping Malcolm Crossley
@ 2016-02-17 20:22   ` Konrad Rzeszutek Wilk
  2016-02-24 15:08     ` George Dunlap
  0 siblings, 1 reply; 21+ messages in thread
From: Konrad Rzeszutek Wilk @ 2016-02-17 20:22 UTC (permalink / raw)
  To: Malcolm Crossley; +Cc: jbeulich, xen-devel

On Wed, Feb 10, 2016 at 10:10:30AM +0000, Malcolm Crossley wrote:
> If IOMMU driver does not implement lookup_page function then it returns -ENOMEM.

That is a very odd return code. Could you explain why -ENOMEM?

> 
> Returns 0 on success and any other value on failure.
> 
> Signed-off-by: Malcolm Crossley <malcolm.crossley@citrix.com>
> --
> Cc: jbeulich@suse.com
> Cc: xen-devel@lists.xen.org
> ---
>  xen/drivers/passthrough/iommu.c | 21 +++++++++++++++++++++
>  xen/include/xen/iommu.h         |  2 ++
>  2 files changed, 23 insertions(+)
> 
> diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c
> index 0b2abf4..06f21ee 100644
> --- a/xen/drivers/passthrough/iommu.c
> +++ b/xen/drivers/passthrough/iommu.c
> @@ -257,6 +257,27 @@ int iommu_unmap_page(struct domain *d, unsigned long gfn)
>      return hd->platform_ops->unmap_page(d, gfn);
>  }
>  
> +int iommu_lookup_page(struct domain *d, unsigned long bfn, unsigned long *gfn)
> +{
> +    struct hvm_iommu *hd = domain_hvm_iommu(d);
> +
> +    /* 
> +     * BFN maps 1:1 to GFN when iommu passthrough is enabled or 
> +     * when IOMMU shared page tables is in use

Missing full stop.

> +     */
> +    if ( iommu_use_hap_pt(d) || (iommu_passthrough && is_hardware_domain(d)) )

The comment is not in sync with the code. But what I am curious - if
the dom0 is PVH and the shared EPT is disabled, what path should we take?

I would think we would need to skip this and go to the ->lookup_page?

> +    {
> +        *gfn = bfn;
> +        return 0;
> +    }
> +
> +    if ( !iommu_enabled || !hd->platform_ops ||
> +            !hd->platform_ops->lookup_page )
> +        return -ENOMEM;

-ENOMEM ? ENXIO ? Or maybe -ENOSYS for the case when hd->platform_ops
is not set nor ->lookup_page?

> +
> +    return hd->platform_ops->lookup_page(d, bfn, gfn);
> +}
> +
>  static void iommu_free_pagetables(unsigned long unused)
>  {
>      do {
> diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h
> index 8217cb7..49ca087 100644
> --- a/xen/include/xen/iommu.h
> +++ b/xen/include/xen/iommu.h
> @@ -77,6 +77,7 @@ void iommu_teardown(struct domain *d);
>  int iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn,
>                     unsigned int flags);
>  int iommu_unmap_page(struct domain *d, unsigned long gfn);
> +int iommu_lookup_page(struct domain *d, unsigned long bfn, unsigned long *gfn);
>  
>  enum iommu_feature
>  {
> @@ -151,6 +152,7 @@ struct iommu_ops {
>      int (*map_page)(struct domain *d, unsigned long gfn, unsigned long mfn,
>                      unsigned int flags);
>      int (*unmap_page)(struct domain *d, unsigned long gfn);
> +    int (*lookup_page)(struct domain *d, unsigned long bfn, unsigned long *gfn);
>      void (*free_page_table)(struct page_info *);
>  #ifdef CONFIG_X86
>      void (*update_ire_from_apic)(unsigned int apic, unsigned int reg, unsigned int value);
> -- 
> 1.7.12.4
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC PATCH 3/7] VT-d: Add iommu_lookup_page support
  2016-02-10 10:10 ` [RFC PATCH 3/7] VT-d: Add iommu_lookup_page support Malcolm Crossley
@ 2016-02-17 20:32   ` Konrad Rzeszutek Wilk
  0 siblings, 0 replies; 21+ messages in thread
From: Konrad Rzeszutek Wilk @ 2016-02-17 20:32 UTC (permalink / raw)
  To: Malcolm Crossley; +Cc: kevin.tian, feng.wu, xen-devel

On Wed, Feb 10, 2016 at 10:10:31AM +0000, Malcolm Crossley wrote:
> Function does not need to handle shared EPT use of IOMMU as core code
> already handles this.

Could you mention which part of 'core code' handles this?

Also you may want to say this code can only deal with 4K pages but
not with larger ones.

> 
> Signed-off-by: Malcolm Crossley <malcolm.crossley@citrix.com>
> --
> Cc: kevin.tian@intel.com
> Cc: feng.wu@intel.com
> Cc: xen-devel@lists.xen.org
> ---
>  xen/drivers/passthrough/vtd/iommu.c | 31 +++++++++++++++++++++++++++++++
>  xen/drivers/passthrough/vtd/iommu.h |  1 +
>  2 files changed, 32 insertions(+)
> 
> diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c
> index ec31c6b..0c79b48 100644
> --- a/xen/drivers/passthrough/vtd/iommu.c
> +++ b/xen/drivers/passthrough/vtd/iommu.c
> @@ -1754,6 +1754,36 @@ static int intel_iommu_unmap_page(struct domain *d, unsigned long gfn)
>      return 0;
>  }
>  
> +static int intel_iommu_lookup_page(
> +    struct domain *d, unsigned long gfn, unsigned long *mfn)
> +{
> +    struct hvm_iommu *hd = domain_hvm_iommu(d);
> +    struct dma_pte *page = NULL, *pte = NULL, old;
> +    u64 pg_maddr;
> +
> +    spin_lock(&hd->arch.mapping_lock);
> +
> +    pg_maddr = addr_to_dma_page_maddr(d, (paddr_t)gfn << PAGE_SHIFT_4K, 1);
> +    if ( pg_maddr == 0 )
> +    {
> +        spin_unlock(&hd->arch.mapping_lock);
> +        return -ENOMEM;
> +    }
> +    page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
> +    pte = page + (gfn & LEVEL_MASK);
> +    old = *pte;
> +    if (!dma_pte_present(old)) {
> +        unmap_vtd_domain_page(page);
> +        spin_unlock(&hd->arch.mapping_lock);
> +        return -ENOMEM;
> +    }
> +    unmap_vtd_domain_page(page);
> +    spin_unlock(&hd->arch.mapping_lock);

All this code looks close to lookup routine in dma_pte_clear_one and
intel_iommu_map_page.

Could you move most of this lookup code in a static function that your
code and the other ones could call?

> +
> +    *mfn = dma_get_pte_addr(old) >> PAGE_SHIFT_4K;
> +    return 0;
> +}
> +
>  void iommu_pte_flush(struct domain *d, u64 gfn, u64 *pte,
>                       int order, int present)
>  {
> @@ -2534,6 +2564,7 @@ const struct iommu_ops intel_iommu_ops = {
>      .teardown = iommu_domain_teardown,
>      .map_page = intel_iommu_map_page,
>      .unmap_page = intel_iommu_unmap_page,
> +    .lookup_page = intel_iommu_lookup_page,
>      .free_page_table = iommu_free_page_table,
>      .reassign_device = reassign_device_ownership,
>      .get_device_group_id = intel_iommu_group_id,
> diff --git a/xen/drivers/passthrough/vtd/iommu.h b/xen/drivers/passthrough/vtd/iommu.h
> index c55ee08..03583ef 100644
> --- a/xen/drivers/passthrough/vtd/iommu.h
> +++ b/xen/drivers/passthrough/vtd/iommu.h
> @@ -275,6 +275,7 @@ struct dma_pte {
>  #define dma_pte_addr(p) ((p).val & PADDR_MASK & PAGE_MASK_4K)
>  #define dma_set_pte_addr(p, addr) do {\
>              (p).val |= ((addr) & PAGE_MASK_4K); } while (0)
> +#define dma_get_pte_addr(p) (((p).val & PAGE_MASK_4K))
>  #define dma_pte_present(p) (((p).val & DMA_PTE_PROT) != 0)
>  #define dma_pte_superpage(p) (((p).val & DMA_PTE_SP) != 0)
>  
> -- 
> 1.7.12.4
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC PATCH 4/7] common/pv-iommu: Add query, map and unmap ops
  2016-02-10 10:10 ` [RFC PATCH 4/7] common/pv-iommu: Add query, map and unmap ops Malcolm Crossley
@ 2016-02-17 21:05   ` Konrad Rzeszutek Wilk
  2016-02-17 21:07     ` Konrad Rzeszutek Wilk
  0 siblings, 1 reply; 21+ messages in thread
From: Konrad Rzeszutek Wilk @ 2016-02-17 21:05 UTC (permalink / raw)
  To: Malcolm Crossley; +Cc: andrew.cooper3, xen-devel, keir, jbeulich, tim

On Wed, Feb 10, 2016 at 10:10:32AM +0000, Malcolm Crossley wrote:
> Implement above ops according to PV-IOMMU design draft D.

.. which would be great if they were part of this patch series
and you could just: in docs/misc/blah.

> 
> Currently restricted to hardware domains only due to RFC status.

Um, .. that is not reassuring. Perhaps the design document (which
would be in the patchset) would iterate the rest of TODOs?

> 
> Signed-off-by: Malcolm Crossley <malcolm.crossley@citrix.com>
> --
> Cc: jbeulich@suse.com
> Cc: keir@xen.org
> Cc: tim@xen.org
> Cc: andrew.cooper3@citrix.com
> Cc: xen-devel@lists.xen.org
> ---
>  xen/common/pv_iommu.c         | 228 +++++++++++++++++++++++++++++++++++++++++-
>  xen/include/public/pv-iommu.h |  69 +++++++++++++
>  2 files changed, 296 insertions(+), 1 deletion(-)
>  create mode 100644 xen/include/public/pv-iommu.h
> 
> diff --git a/xen/common/pv_iommu.c b/xen/common/pv_iommu.c
> index 304fccf..91485f3 100644
> --- a/xen/common/pv_iommu.c
> +++ b/xen/common/pv_iommu.c
> @@ -17,13 +17,239 @@
>   * along with this program; If not, see <http://www.gnu.org/licenses/>.
>   */
>  
> +#include <asm/p2m.h>
> +#include <asm/event.h>

Can those be sorted?

>  #include <xen/guest_access.h>
> +#include <public/pv-iommu.h>
>  
>  #define ret_t long
>  
> +static int get_paged_frame(unsigned long gfn, unsigned long *frame,
> +                           struct page_info **page, int readonly,
> +                           struct domain *rd)
> +{
> +    int rc = 0;
> +#if defined(P2M_PAGED_TYPES) || defined(P2M_SHARED_TYPES)
> +    p2m_type_t p2mt;
> +
> +    *page = get_page_from_gfn(rd, gfn, &p2mt,
> +                             (readonly) ? P2M_ALLOC : P2M_UNSHARE);
> +    if ( !(*page) )
> +    {
> +        *frame = INVALID_MFN;
> +        if ( p2m_is_shared(p2mt) )
> +            return -EIO;
> +        if ( p2m_is_paging(p2mt) )
> +        {
> +            p2m_mem_paging_populate(rd, gfn);
> +            return -EIO;
> +        }
> +        return -EIO;
> +    }
> +    *frame = page_to_mfn(*page);
> +#else
> +    *frame = gmfn_to_mfn(rd, gfn);
> +    *page = mfn_valid(*frame) ? mfn_to_page(*frame) : NULL;
> +    if ( (!(page)) || (!get_page*page, rd) )
> +    {
> +        *frame = INVALID_MFN;
> +        *page = NULL;
> +        rc = -EIO;
> +    }
> +#endif
> +
> +    return rc;
> +}
> +
> +int can_use_iommu_check(struct domain *d)
> +{
> +    if ( !iommu_enabled || (!is_hardware_domain(d) && !need_iommu(d)) )
> +        return 0;
> +
> +    if ( is_hardware_domain(d) && iommu_passthrough )
> +        return 0;
> +

What if a PV guests calls this hypercall? Won't it crash on platform_ops derefence?

> +    if ( !domain_hvm_iommu(d)->platform_ops->lookup_page )
> +        return 0;
> +
> +    return 1;
> +}
> +
> +void do_iommu_sub_op(struct pv_iommu_op *op)
> +{
> +    struct domain *d = current->domain;
> +    struct domain *rd = NULL;
> +
> +    /* Only order 0 pages supported */

Missing full stop.

> +    if ( IOMMU_get_page_order(op->flags) != 0 )
> +    {
> +        op->status = -ENOSPC;

Not ENOSYS? or EINVAL?

> +        goto finish;
> +    }
> +
> +    switch ( op->subop_id )
> +    {
> +        case IOMMUOP_query_caps:
> +        {
> +            op->flags = 0;
> +            op->status = 0;
> +            if ( can_use_iommu_check(d) )
> +                op->flags |= IOMMU_QUERY_map_cap;

s/|=/=/ ?

> +
> +            break;
> +        }
> +        case IOMMUOP_map_page:
> +        {
> +            unsigned long mfn, tmp;

mfn_t pls.

> +            unsigned int flags = 0;

Not uint16_t ?
[edit: ah, this is for  iommu_map_page. You may want to mention that]
> +            struct page_info *page = NULL;
> +
> +            /* Check if calling domain can create IOMMU mappings */

Missing full stop.

> +            if ( !can_use_iommu_check(d) )
> +            {
> +                op->status = -EPERM;
> +                goto finish;
> +            }
> +
> +            /* Check we are the owner of the page */

Full stop missing.

> +            if ( !is_hardware_domain(d) &&
> +                 ( maddr_get_owner(op->u.map_page.gfn) != d ) )
> +            {
> +                op->status = -EPERM;
> +                goto finish;
> +            }
> +
> +            /* Lookup page struct backing gfn */

Stop full missing.

> +            if ( is_hardware_domain(d) &&
> +                (op->flags & IOMMU_MAP_OP_no_ref_cnt) )
> +            {
> +                mfn = op->u.map_page.gfn;
> +                page = mfn_to_page(mfn);
> +                if (!page)

Wrong syntax. if ( !page )

> +                {
> +                    op->status = -EPERM;
> +                    goto finish;
> +                }
> +            } else if ( get_paged_frame(op->u.map_page.gfn, &mfn, &page, 0, d) )

I think the syntax waants the 'else if' on its own line.

You may want to put a comment around the 0 value.

> +            {
> +                op->status = -EPERM;
> +                goto finish;
> +            }
> +
> +            /* Check for conflict with existing BFN mappings */

Stop missing full.

> +            if ( !iommu_lookup_page(d, op->u.map_page.bfn, &tmp) )
> +            {
> +                if ( !(op->flags & IOMMU_MAP_OP_no_ref_cnt) )

This check is bit weak. The guest (not hardware domain) could set this
flag for fun and the code above would be OK with it.

AS your your check for IOMMU_MAP_OP_no_ref_cnt just tries to do
an get_paged_Frame.

You may just want to have some sanity checking on the flags at the start
of this code?

> +                    put_page(page);
> +                op->status = -EPERM;
> +                goto finish;
> +            }
> +
> +            if ( op->flags & IOMMU_OP_readable )
> +                flags |= IOMMUF_readable;
> +
> +            if ( op->flags & IOMMU_OP_writeable )
> +                flags |= IOMMUF_writable;
> +

What if neither option is set? Should we disallow that?

> +            if ( iommu_map_page(d, op->u.map_page.bfn, mfn, flags) )
> +            {
> +                if ( !(op->flags & IOMMU_MAP_OP_no_ref_cnt) )
> +                    put_page(page);

Again, the non hardware domain could set this.
> +                op->status = -EIO;
> +                goto finish;
> +            }
> +
> +            op->status = 0;
> +            break;
> +        }
> +
> +        case IOMMUOP_unmap_page:
> +        {
> +            unsigned long mfn;

mfn_t.
> +
> +            /* Check if there is a valid BFN mapping for this domain */

Full .. you know what I am going to say.

> +            if ( iommu_lookup_page(d, op->u.unmap_page.bfn, &mfn) )

Wait a minute. Just like that? Don't you want to do some sanity checks?

What if the guest decided for fun to do:
for ( long i = -1; i ; i--)
	hypercall(IOMMUOP_unmap_page, i);

It would naturally fail but we would be taking the hd->Arch.mapping_lock
quite a lot! PErhaps do the check (similar to how you have in the mapping
code?)

Maybe even move the checks in their own function?

> +            {
> +                op->status = -ENOENT;
> +                goto finish;
> +            }
> +
> +            if ( iommu_unmap_page(d, op->u.unmap_page.bfn) )
> +            {
> +                op->status = -EIO;

Why not propogage its error values?
> +                goto finish;
> +            }
> +
> +            op->status = 0;
> +            break;
> +        }
> +
> +        default:
> +            op->status = -ENODEV;
> +            break;
> +    }
> +
> +finish:
> +    if ( rd )
> +        rcu_unlock_domain(rd);
> +
> +    return;
> +}
> +
>  ret_t do_iommu_op(XEN_GUEST_HANDLE_PARAM(void) arg, unsigned int count)

Shouldn't this be changed to be pv_iommu_op_t? instead of void?


>  {
> -    return -ENOSYS;
> +    ret_t ret = 0;
> +    int i;

unsigned int ?
> +    struct pv_iommu_op op;
> +    struct domain *d = current->domain;
> +
> +    if ( !is_hardware_domain(d) )
> +        return -ENOSYS;

-EPERM ?

> +
> +    if ( (int)count < 0 )

Um. If you are doing this - why don't you just make the parameter be 'int'
instead of unsigned int ?

> +        return -EINVAL;
> +
> +    if ( count > 1 )
> +        this_cpu(iommu_dont_flush_iotlb) = 1;

What if the hypercall is  IOMMUOP_query_caps and count is zero (as it
seems it ought to be OK - you just query for the caps). Or what if it is
IOMMUOP_query_caps  and the count is 31415 ?

Perhaps you should do some sanity checks here for some of the
ops?

> +
> +    for ( i = 0; i < count; i++ )

Can you keep the signes of the count and i to be the same pls?

> +    {
> +        if ( i && hypercall_preempt_check() )
> +        {
> +            ret =  i;

Oh, that should definitly be mentioned in the header file.
[edit: I see now that you do hypercall preemption]

> +            goto flush_pages;
> +        }
> +        if ( unlikely(__copy_from_guest_offset(&op, arg, i, 1)) )
> +        {
> +            ret = -EFAULT;
> +            goto flush_pages;
> +        }
> +        do_iommu_sub_op(&op);
> +        if ( unlikely(__copy_to_guest_offset(arg, i, &op, 1)) )
> +        {
> +            ret = -EFAULT;
> +            goto flush_pages;
> +        }
> +    }
> +
> +flush_pages:
> +    if ( ret > 0 )
> +    {
> +        XEN_GUEST_HANDLE_PARAM(pv_iommu_op_t) op =
> +            guest_handle_cast(arg, pv_iommu_op_t);
> +        ASSERT(ret < count);
> +        guest_handle_add_offset(op, i);
> +        arg = guest_handle_cast(op, void);
> +        ret = hypercall_create_continuation(__HYPERVISOR_iommu_op,
> +                                           "hi", arg, count - i);
> +    }
> +    if ( count > 1 )
> +    {
> +        this_cpu(iommu_dont_flush_iotlb) = 0;
> +        if ( i )
> +            iommu_iotlb_flush_all(d);
> +    }
> +    return ret;
>  }
>  
>  /*
> diff --git a/xen/include/public/pv-iommu.h b/xen/include/public/pv-iommu.h
> new file mode 100644
> index 0000000..0f19f96
> --- /dev/null
> +++ b/xen/include/public/pv-iommu.h
> @@ -0,0 +1,69 @@
> +/*
> + * Permission is hereby granted, free of charge, to any person obtaining a copy
> + * of this software and associated documentation files (the "Software"), to
> + * deal in the Software without restriction, including without limitation the
> + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
> + * sell copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> + * DEALINGS IN THE SOFTWARE.
> + */
> +
> +#ifndef __XEN_PUBLIC_PV_IOMMU_H__
> +#define __XEN_PUBLIC_PV_IOMMU_H__
> +
> +#include "xen.h"
> +
> +#define IOMMUOP_query_caps            1
> +#define IOMMUOP_map_page              2
> +#define IOMMUOP_unmap_page            3

Also you are missing explanation of what those hypercalls do. You can copy-n-paste
from the design doc.

Perhaps IOMMU_OP_... ?
> +
> +struct pv_iommu_op {
> +    uint16_t subop_id;

Why subop? Isn't it 'op'? To make with IOMMUOP.. 

> +
> +#define IOMMU_page_order (0xf1 << 10)
> +#define IOMMU_get_page_order(flags) ((flags & IOMMU_page_order) >> 10)
> +#define IOMMU_QUERY_map_cap (1 << 0)
> +#define IOMMU_QUERY_map_all_mfns (1 << 1)
> +#define IOMMU_OP_readable (1 << 0)
> +#define IOMMU_OP_writeable (1 << 1)
> +#define IOMMU_MAP_OP_no_ref_cnt (1 << 2)

Something is off with your tabs. Could you make these be on the same column?

And since these are flags perhaps IOMMU_F_.. ?

Also the hypercall accepts two arguments. So what is the 'unsigned int size'
for ?

That should be mentioned here.

> +    uint16_t flags;
> +    int32_t status;
> +
> +    union {
> +        struct {
> +            uint64_t bfn;
> +            uint64_t gfn;
> +        } map_page;
> +
> +        struct {
> +            uint64_t bfn;
> +        } unmap_page;
> +    } u;
> +};
> +
> +
> +typedef struct pv_iommu_op pv_iommu_op_t;
> +DEFINE_XEN_GUEST_HANDLE(pv_iommu_op_t);
> +
> +#endif
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * tab-width: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
> -- 
> 1.7.12.4
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC PATCH 4/7] common/pv-iommu: Add query, map and unmap ops
  2016-02-17 21:05   ` Konrad Rzeszutek Wilk
@ 2016-02-17 21:07     ` Konrad Rzeszutek Wilk
  0 siblings, 0 replies; 21+ messages in thread
From: Konrad Rzeszutek Wilk @ 2016-02-17 21:07 UTC (permalink / raw)
  To: Malcolm Crossley; +Cc: andrew.cooper3, xen-devel, keir, jbeulich, tim

> >  ret_t do_iommu_op(XEN_GUEST_HANDLE_PARAM(void) arg, unsigned int count)
> 
> Shouldn't this be changed to be pv_iommu_op_t? instead of void?
> 
> 
> >  {
> > -    return -ENOSYS;
> > +    ret_t ret = 0;
> > +    int i;
> 
> unsigned int ?
> > +    struct pv_iommu_op op;
> > +    struct domain *d = current->domain;
> > +
> > +    if ( !is_hardware_domain(d) )
> > +        return -ENOSYS;
> 
> -EPERM ?
> 
> > +


Also you are missing the XSM checks which should be here.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC PATCH 6/7] common/pv-iommu: Add foreign ops to PV-IOMMU interface
  2016-02-10 10:10 ` [RFC PATCH 6/7] common/pv-iommu: Add foreign ops to PV-IOMMU interface Malcolm Crossley
@ 2016-02-17 21:11   ` Konrad Rzeszutek Wilk
  0 siblings, 0 replies; 21+ messages in thread
From: Konrad Rzeszutek Wilk @ 2016-02-17 21:11 UTC (permalink / raw)
  To: Malcolm Crossley
  Cc: keir, ian.campbell, andrew.cooper3, tim, xen-devel, jbeulich, dgdegra

On Wed, Feb 10, 2016 at 10:10:34AM +0000, Malcolm Crossley wrote:
> Currently the ops are X86 only due to a dependency on the
> X86 only B2M implementation.
> 
> Foreign ops conform to draft D of PV-IOMMU design.
> 
> XSM control been implemented to allow full security control of
> these priviledged operatins.

But you missed the decleration of them in access_vectors, the
default policy in xen.te and also the implementation in the hooks.c

And that should all be part of the code that adds the new hypercall.

> 
> Page references and page locking are taken before using B2M
> interface which is mandated by the B2M interface itself.
> 
> Signed-off-by: Malcolm Crossley <malcolm.crossley@citrix.com>
> --
> Cc: dgdegra@tycho.nsa.gov
> Cc: jbeulich@suse.com
> Cc: ian.campbell@citrix.com
> Cc: keir@xen.org
> Cc: tim@xen.org
> Cc: andrew.cooper3@citrix.com
> Cc: xen-devel@lists.xen.org
> ---
>  xen/common/pv_iommu.c         | 269 ++++++++++++++++++++++++++++++++++++++++++
>  xen/include/public/pv-iommu.h |  22 ++++
>  xen/include/xsm/dummy.h       |   6 +
>  xen/include/xsm/xsm.h         |   6 +
>  xen/xsm/dummy.c               |   1 +
>  5 files changed, 304 insertions(+)
> 
> diff --git a/xen/common/pv_iommu.c b/xen/common/pv_iommu.c
> index 91485f3..bbf1a21 100644
> --- a/xen/common/pv_iommu.c
> +++ b/xen/common/pv_iommu.c
> @@ -21,7 +21,12 @@
>  #include <asm/event.h>
>  #include <xen/guest_access.h>
>  #include <public/pv-iommu.h>
> +#include <xsm/xsm.h>
>  
> +#ifdef CONFIG_X86
> +#include <asm/m2b.h>
> +#include <asm/setup.h>
> +#endif
>  #define ret_t long
>  
>  static int get_paged_frame(unsigned long gfn, unsigned long *frame,
> @@ -79,6 +84,7 @@ void do_iommu_sub_op(struct pv_iommu_op *op)
>  {
>      struct domain *d = current->domain;
>      struct domain *rd = NULL;
> +    int ret;
>  
>      /* Only order 0 pages supported */
>      if ( IOMMU_get_page_order(op->flags) != 0 )
> @@ -183,7 +189,270 @@ void do_iommu_sub_op(struct pv_iommu_op *op)
>              op->status = 0;
>              break;
>          }
> +#ifdef CONFIG_X86

Why not move all of this in arch/x86/pv_iommu.c code that would
implement per-arch code?

> +        case IOMMUOP_map_foreign_page:

Didn't look at the rest of the code - will when you repost.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC PATCH 2/7] iommu: add iommu_lookup_page to lookup guest gfn for a particular IOMMU mapping
  2016-02-17 20:22   ` Konrad Rzeszutek Wilk
@ 2016-02-24 15:08     ` George Dunlap
  2016-02-24 15:10       ` Andrew Cooper
  0 siblings, 1 reply; 21+ messages in thread
From: George Dunlap @ 2016-02-24 15:08 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk; +Cc: Malcolm Crossley, Jan Beulich, xen-devel

On Wed, Feb 17, 2016 at 8:22 PM, Konrad Rzeszutek Wilk
<konrad.wilk@oracle.com> wrote:
>> +    {
>> +        *gfn = bfn;
>> +        return 0;
>> +    }
>> +
>> +    if ( !iommu_enabled || !hd->platform_ops ||
>> +            !hd->platform_ops->lookup_page )
>> +        return -ENOMEM;
>
> -ENOMEM ? ENXIO ? Or maybe -ENOSYS for the case when hd->platform_ops
> is not set nor ->lookup_page?

I think ENOTSUPP, perhaps?  ENOMEM means a memory allocation failed
(which it hasn't); ENOSYS means this is an invalid system call (which
it isn't).  ESRCH could be close too, but it seems like that would be
more appropriate when you try to lookup a non-existent BFN.

 -George

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC PATCH 2/7] iommu: add iommu_lookup_page to lookup guest gfn for a particular IOMMU mapping
  2016-02-24 15:08     ` George Dunlap
@ 2016-02-24 15:10       ` Andrew Cooper
  0 siblings, 0 replies; 21+ messages in thread
From: Andrew Cooper @ 2016-02-24 15:10 UTC (permalink / raw)
  To: George Dunlap, Konrad Rzeszutek Wilk
  Cc: Malcolm Crossley, Jan Beulich, xen-devel

On 24/02/16 15:08, George Dunlap wrote:
> On Wed, Feb 17, 2016 at 8:22 PM, Konrad Rzeszutek Wilk
> <konrad.wilk@oracle.com> wrote:
>>> +    {
>>> +        *gfn = bfn;
>>> +        return 0;
>>> +    }
>>> +
>>> +    if ( !iommu_enabled || !hd->platform_ops ||
>>> +            !hd->platform_ops->lookup_page )
>>> +        return -ENOMEM;
>> -ENOMEM ? ENXIO ? Or maybe -ENOSYS for the case when hd->platform_ops
>> is not set nor ->lookup_page?
> I think ENOTSUPP, perhaps?  ENOMEM means a memory allocation failed
> (which it hasn't); ENOSYS means this is an invalid system call (which
> it isn't).  ESRCH could be close too, but it seems like that would be
> more appropriate when you try to lookup a non-existent BFN.

Xen (fairly) consistently uses ESRCH for "domain not found".  I would
recommend -ENOTSUPP here.

~Andrew

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC PATCH 5/7] x86/m2b: Add a tracking structure for mfn to bfn mappings per page
  2016-02-10 10:10 ` [RFC PATCH 5/7] x86/m2b: Add a tracking structure for mfn to bfn mappings per page Malcolm Crossley
@ 2016-02-24 17:07   ` George Dunlap
  0 siblings, 0 replies; 21+ messages in thread
From: George Dunlap @ 2016-02-24 17:07 UTC (permalink / raw)
  To: Malcolm Crossley; +Cc: Andrew Cooper, Keir Fraser, Jan Beulich, xen-devel

On Wed, Feb 10, 2016 at 10:10 AM, Malcolm Crossley
<malcolm.crossley@citrix.com> wrote:
> Add a pointer to the page struct which refers to the head of
> m2b tracking structure for that page.
>
> Atomically add a PGC bit to the page struct when setting the pointer to
> the m2b tracking structure.
>
> Adding elements to the per page m2b tracking structure is done via a
> RCU protected linked list.
>
> Callers of add_m2b and del_m2b must hold the page lock for the page.
> This prevents the m2b code racing with itself and allows for fine
> gain locking. References to the page are also expected to be taken and
> dropped by the callers to the m2b code.
>
> m2b references are forcibly removed as part of domain destroy to ensure malicious
> guests do not cause memory

Editing mistake?

[snip]

> +struct m2b_entry *lookup_m2b_entry(struct page_info *page, struct domain *d,
> +                                   ioservid_t ioserver, unsigned long bfn)
> +{
> +    struct m2b_entry *m2b_e = NULL;
> +    struct list_head *entry;
> +    domid_t domain = d->domain_id;
> +
> +    if ( !test_bit(_PGC_foreign_map, &page->count_info) )
> +        return NULL;
> +
> +    rcu_read_lock(&m2b_rcu);
> +    list_for_each_rcu(entry, &page->pv_iommu->head)
> +    {
> +        m2b_e = list_entry(entry, struct m2b_entry, list);
> +        if ( m2b_e->domain == domain && m2b_e->ioserver == ioserver &&
> +                m2b_e->bfn == bfn )
> +                    goto done;
> +        else if ( ioserver == IOSERVER_ANY && m2b_e->domain == domain &&
> +                  m2b_e->bfn == bfn )
> +                    goto done;
> +        else if ( bfn == BFN_ANY && m2b_e->domain == domain &&
> +                  m2b_e->ioserver == ioserver )
> +                    goto done;
> +        else if ( bfn == BFN_ANY && ioserver == IOSERVER_ANY &&
> +                  m2b_e->domain == domain )
> +                    goto done;
> +    }

Just a minor note while I'm thinking about it -- since every single
one of these have "m2b_e->domain == domain", wouldn't it be easier to
read if you just had "if (m2b_e->domain != domain) continue" first,
and then the series of if statements without mentioning the domain?

 -George

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC PATCH 0/7] Implement Xen PV-IOMMU interface
  2016-02-10 10:33 ` [RFC PATCH 0/7] Implement Xen PV-IOMMU interface Malcolm Crossley
  2016-02-17 20:12   ` Konrad Rzeszutek Wilk
@ 2016-02-24 18:08   ` George Dunlap
  2016-11-22 16:21   ` Martin Cerveny
  2 siblings, 0 replies; 21+ messages in thread
From: George Dunlap @ 2016-02-24 18:08 UTC (permalink / raw)
  To: Malcolm Crossley
  Cc: Tian, Kevin, Keir Fraser, Ian Campbell, Andrew Cooper,
	Tim Deegan, xen-devel, Jan Beulich, feng.wu, Daniel De Graaf

On Wed, Feb 10, 2016 at 10:33 AM, Malcolm Crossley
<malcolm.crossley@citrix.com> wrote:
> This RFC series implements the PV-IOMMU interface according to the PV-IOMMU
> design draft D.
>
> The PV-IOMMU interface is currently restricted to being used by the hardware
> domain only as non hardware domain callers have not been fully tested yet.
>
> Significant effort was put into implementing a m2b tracking structure without
> increasing the size of struct page but no union was found that could be safely
> used in all cases when a page is allocated to an HVM domain. Comments and
> feedback on the m2b design are most welcome.
>
> The hardware domain specific IOMMU pre map mechanism was implemented in order
> to keep performance parity with current out of tree mechanisms to obtain BFNs
> for foreign guest owned memory. The pre map mechanism is not a weakening of
> the current security model of Xen and is only allowed when the hardware domain
> is allowed relaxed IOMMU mappings.
>
>
> Malcolm Crossley (7):
>   common/pv-iommu: Add stub hypercall for PV-IOMMU
>   iommu: add iommu_lookup_page to lookup guest gfn for a particular
>     IOMMU mapping
>   VT-d: Add iommu_lookup_page support
>   common/pv-iommu: Add query, map and unmap ops
>   x86/m2b: Add a tracking structure for mfn to bfn mappings per page
>   common/pv-iommu: Add foreign ops to PV-IOMMU interface
>   common/pv-iommu: Allow hardware_domain to pre IOMMU map foreign
>     memory
>
>  xen/arch/x86/domain.c               |  12 +-
>  xen/arch/x86/mm/Makefile            |   1 +
>  xen/arch/x86/mm/m2b.c               | 211 ++++++++++++

My first question from the RFC perspective is whether mm/ is the right
place for this file to go.  It doesn't seem to have any interaction
with any of the other p2m functionality, nor does it seem to resemble
the functionality much (using linked lists from the page struct rather
than the pagetable structure).  I don't mind it going under mm/ per se
but it seems like it might fit logically better under
xen/drivers/passthrough.

Definitely agree that the design doc needs to be checked into the tree
as a part of this series.

The other minor question is why you have the patches broken down quite
the way you do -- what's the point of adding a stub hypercall that
doesn't do anything, and adding a callback framework that doesn't have
any users?  Why not just squash patches 1 and 4 together, and patches
2 and 3 together?

The design doc is well-written and thorough, thanks. :-)

 -George

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC PATCH 0/7] Implement Xen PV-IOMMU interface
  2016-02-10 10:33 ` [RFC PATCH 0/7] Implement Xen PV-IOMMU interface Malcolm Crossley
  2016-02-17 20:12   ` Konrad Rzeszutek Wilk
  2016-02-24 18:08   ` George Dunlap
@ 2016-11-22 16:21   ` Martin Cerveny
  2 siblings, 0 replies; 21+ messages in thread
From: Martin Cerveny @ 2016-11-22 16:21 UTC (permalink / raw)
  To: Malcolm Crossley; +Cc: xen-devel

Hello.

Any update on this ?

Thanks, Martin Cerveny

On Wed, 10 Feb 2016, Malcolm Crossley wrote:

> This RFC series implements the PV-IOMMU interface according to the PV-IOMMU
> design draft D.
>
> The PV-IOMMU interface is currently restricted to being used by the hardware
> domain only as non hardware domain callers have not been fully tested yet.
>
> Significant effort was put into implementing a m2b tracking structure without
> increasing the size of struct page but no union was found that could be safely
> used in all cases when a page is allocated to an HVM domain. Comments and
> feedback on the m2b design are most welcome.
>
> The hardware domain specific IOMMU pre map mechanism was implemented in order
> to keep performance parity with current out of tree mechanisms to obtain BFNs
> for foreign guest owned memory. The pre map mechanism is not a weakening of
> the current security model of Xen and is only allowed when the hardware domain
> is allowed relaxed IOMMU mappings.
>
>
> Malcolm Crossley (7):
>  common/pv-iommu: Add stub hypercall for PV-IOMMU
>  iommu: add iommu_lookup_page to lookup guest gfn for a particular
>    IOMMU mapping
>  VT-d: Add iommu_lookup_page support
>  common/pv-iommu: Add query, map and unmap ops
>  x86/m2b: Add a tracking structure for mfn to bfn mappings per page
>  common/pv-iommu: Add foreign ops to PV-IOMMU interface
>  common/pv-iommu: Allow hardware_domain to pre IOMMU map foreign
>    memory
>
> xen/arch/x86/domain.c               |  12 +-
> xen/arch/x86/mm/Makefile            |   1 +
> xen/arch/x86/mm/m2b.c               | 211 ++++++++++++
> xen/arch/x86/x86_64/compat/entry.S  |   2 +
> xen/arch/x86/x86_64/entry.S         |   2 +
> xen/common/Makefile                 |   1 +
> xen/common/memory.c                 |  11 +
> xen/common/pv_iommu.c               | 633 ++++++++++++++++++++++++++++++++++++
> xen/drivers/passthrough/iommu.c     |  21 ++
> xen/drivers/passthrough/vtd/iommu.c |  31 ++
> xen/drivers/passthrough/vtd/iommu.h |   1 +
> xen/include/asm-x86/domain.h        |   1 +
> xen/include/asm-x86/m2b.h           |  65 ++++
> xen/include/asm-x86/mm.h            |  12 +-
> xen/include/public/hvm/ioreq.h      |   1 +
> xen/include/public/pv-iommu.h       |  93 ++++++
> xen/include/public/xen.h            |   1 +
> xen/include/xen/iommu.h             |   2 +
> xen/include/xen/sched.h             |   4 +
> xen/include/xsm/dummy.h             |   6 +
> xen/include/xsm/xsm.h               |   6 +
> xen/xsm/dummy.c                     |   1 +
> 22 files changed, 1116 insertions(+), 2 deletions(-)
> create mode 100644 xen/arch/x86/mm/m2b.c
> create mode 100644 xen/common/pv_iommu.c
> create mode 100644 xen/include/asm-x86/m2b.h
> create mode 100644 xen/include/public/pv-iommu.h
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel
>

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2016-11-22 16:21 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <1455099035-17649-1-git-send-email-malcolm.crossley@citrix.com>
2016-02-10 10:10 ` [RFC PATCH 1/7] common/pv-iommu: Add stub hypercall for PV-IOMMU Malcolm Crossley
2016-02-10 10:36   ` Malcolm Crossley
2016-02-17 20:09   ` Konrad Rzeszutek Wilk
2016-02-10 10:10 ` [RFC PATCH 2/7] iommu: add iommu_lookup_page to lookup guest gfn for a particular IOMMU mapping Malcolm Crossley
2016-02-17 20:22   ` Konrad Rzeszutek Wilk
2016-02-24 15:08     ` George Dunlap
2016-02-24 15:10       ` Andrew Cooper
2016-02-10 10:10 ` [RFC PATCH 3/7] VT-d: Add iommu_lookup_page support Malcolm Crossley
2016-02-17 20:32   ` Konrad Rzeszutek Wilk
2016-02-10 10:10 ` [RFC PATCH 4/7] common/pv-iommu: Add query, map and unmap ops Malcolm Crossley
2016-02-17 21:05   ` Konrad Rzeszutek Wilk
2016-02-17 21:07     ` Konrad Rzeszutek Wilk
2016-02-10 10:10 ` [RFC PATCH 5/7] x86/m2b: Add a tracking structure for mfn to bfn mappings per page Malcolm Crossley
2016-02-24 17:07   ` George Dunlap
2016-02-10 10:10 ` [RFC PATCH 6/7] common/pv-iommu: Add foreign ops to PV-IOMMU interface Malcolm Crossley
2016-02-17 21:11   ` Konrad Rzeszutek Wilk
2016-02-10 10:10 ` [RFC PATCH 7/7] common/pv-iommu: Allow hardware_domain to pre IOMMU map foreign memory Malcolm Crossley
2016-02-10 10:33 ` [RFC PATCH 0/7] Implement Xen PV-IOMMU interface Malcolm Crossley
2016-02-17 20:12   ` Konrad Rzeszutek Wilk
2016-02-24 18:08   ` George Dunlap
2016-11-22 16:21   ` Martin Cerveny

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.