All of lore.kernel.org
 help / color / mirror / Atom feed
* [V9 PATCH 0/8] pvh dom0 patches....
@ 2014-04-16  0:12 Mukesh Rathor
  2014-04-16  0:12 ` [V9 PATCH 1/8] pvh dom0: move some pv specific code to static functions Mukesh Rathor
                   ` (8 more replies)
  0 siblings, 9 replies; 58+ messages in thread
From: Mukesh Rathor @ 2014-04-16  0:12 UTC (permalink / raw)
  To: xen-devel; +Cc: George.Dunlap, keir.xen, tim, JBeulich

Hi,

Please find V9 of pvh dom0 patches. Based on commit: ac0f56a with p2m
patches (rename and do errno propogation) underneath.

 git tree: git://oss.oracle.com/git/mrathor/xen.git  branch: dom0pvh-v9


Following changes from V8:

 patch 2: 
    - pvh_add_mem_mapping : set_mmio_p2m_entry now returns rc
    - add descriptive commit log comments
    - rename hap_set_pvh_alloc_for_dom0 to hap_set_alloc_for_pvh_dom0 and
      move hap alloc calculation to domain_build.c
    - redo comment: "pvh: we temporarily"
    - add comment on top of pvh_fixup_page_tables_for_hap

 patch 3: 
    - trivial merge conflict resolution

 patch 4: 
    - new patch to add some checks in relevant p2m paths for foreign types

 patch 5 (previously patch 4): 
    - move xsm_map_gmfn_foreign near the non-arch declarations and definitions

 patch 6 (previously patch 5): 
    - set_foreign_p2m_entry now returns proper -errno rc in p2m_add_foreign
    - change get_pg_owner to allow foreign mappings for pvh
    - get_pg_owner made public and called from p2m_add_foreign. 
    - created p2m_is_any_ram

 patch 7:
    - redo the small patch because vioapic variable name actually should
      be used for the vioapic ptr.

thanks,
Mukesh

^ permalink raw reply	[flat|nested] 58+ messages in thread

* [V9 PATCH 1/8] pvh dom0: move some pv specific code to static functions
  2014-04-16  0:12 [V9 PATCH 0/8] pvh dom0 patches Mukesh Rathor
@ 2014-04-16  0:12 ` Mukesh Rathor
  2014-04-16  0:12 ` [V9 PATCH 2/8] pvh dom0: construct_dom0 changes Mukesh Rathor
                   ` (7 subsequent siblings)
  8 siblings, 0 replies; 58+ messages in thread
From: Mukesh Rathor @ 2014-04-16  0:12 UTC (permalink / raw)
  To: xen-devel; +Cc: George.Dunlap, keir.xen, tim, JBeulich

In this preparatory patch, some pv specific code is
carved out into static functions. No functionality change.

Signed-off-by: Mukesh Rathor <mukesh.rathor@oracle.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
---
 xen/arch/x86/domain_build.c | 347 ++++++++++++++++++++++++--------------------
 1 file changed, 188 insertions(+), 159 deletions(-)

diff --git a/xen/arch/x86/domain_build.c b/xen/arch/x86/domain_build.c
index 84ce392..f3ae9ad 100644
--- a/xen/arch/x86/domain_build.c
+++ b/xen/arch/x86/domain_build.c
@@ -307,6 +307,187 @@ static void __init process_dom0_ioports_disable(void)
     }
 }
 
+static __init void mark_pv_pt_pages_rdonly(struct domain *d,
+                                           l4_pgentry_t *l4start,
+                                           unsigned long vpt_start,
+                                           unsigned long nr_pt_pages)
+{
+    unsigned long count;
+    struct page_info *page;
+    l4_pgentry_t *pl4e;
+    l3_pgentry_t *pl3e;
+    l2_pgentry_t *pl2e;
+    l1_pgentry_t *pl1e;
+
+    pl4e = l4start + l4_table_offset(vpt_start);
+    pl3e = l4e_to_l3e(*pl4e);
+    pl3e += l3_table_offset(vpt_start);
+    pl2e = l3e_to_l2e(*pl3e);
+    pl2e += l2_table_offset(vpt_start);
+    pl1e = l2e_to_l1e(*pl2e);
+    pl1e += l1_table_offset(vpt_start);
+    for ( count = 0; count < nr_pt_pages; count++ )
+    {
+        l1e_remove_flags(*pl1e, _PAGE_RW);
+        page = mfn_to_page(l1e_get_pfn(*pl1e));
+
+        /* Read-only mapping + PGC_allocated + page-table page. */
+        page->count_info         = PGC_allocated | 3;
+        page->u.inuse.type_info |= PGT_validated | 1;
+
+        /* Top-level p.t. is pinned. */
+        if ( (page->u.inuse.type_info & PGT_type_mask) ==
+             (!is_pv_32on64_domain(d) ?
+              PGT_l4_page_table : PGT_l3_page_table) )
+        {
+            page->count_info        += 1;
+            page->u.inuse.type_info += 1 | PGT_pinned;
+        }
+
+        /* Iterate. */
+        if ( !((unsigned long)++pl1e & (PAGE_SIZE - 1)) )
+        {
+            if ( !((unsigned long)++pl2e & (PAGE_SIZE - 1)) )
+            {
+                if ( !((unsigned long)++pl3e & (PAGE_SIZE - 1)) )
+                    pl3e = l4e_to_l3e(*++pl4e);
+                pl2e = l3e_to_l2e(*pl3e);
+            }
+            pl1e = l2e_to_l1e(*pl2e);
+        }
+    }
+}
+
+static __init void setup_pv_physmap(struct domain *d, unsigned long pgtbl_pfn,
+                                    unsigned long v_start, unsigned long v_end,
+                                    unsigned long vphysmap_start,
+                                    unsigned long vphysmap_end,
+                                    unsigned long nr_pages)
+{
+    struct page_info *page = NULL;
+    l4_pgentry_t *pl4e, *l4start = map_domain_page(pgtbl_pfn);
+    l3_pgentry_t *pl3e = NULL;
+    l2_pgentry_t *pl2e = NULL;
+    l1_pgentry_t *pl1e = NULL;
+
+    if ( v_start <= vphysmap_end && vphysmap_start <= v_end )
+        panic("DOM0 P->M table overlaps initial mapping");
+
+    while ( vphysmap_start < vphysmap_end )
+    {
+        if ( d->tot_pages + ((round_pgup(vphysmap_end) - vphysmap_start)
+                             >> PAGE_SHIFT) + 3 > nr_pages )
+            panic("Dom0 allocation too small for initial P->M table");
+
+        if ( pl1e )
+        {
+            unmap_domain_page(pl1e);
+            pl1e = NULL;
+        }
+        if ( pl2e )
+        {
+            unmap_domain_page(pl2e);
+            pl2e = NULL;
+        }
+        if ( pl3e )
+        {
+            unmap_domain_page(pl3e);
+            pl3e = NULL;
+        }
+        pl4e = l4start + l4_table_offset(vphysmap_start);
+        if ( !l4e_get_intpte(*pl4e) )
+        {
+            page = alloc_domheap_page(d, 0);
+            if ( !page )
+                break;
+
+            /* No mapping, PGC_allocated + page-table page. */
+            page->count_info = PGC_allocated | 2;
+            page->u.inuse.type_info = PGT_l3_page_table | PGT_validated | 1;
+            pl3e = __map_domain_page(page);
+            clear_page(pl3e);
+            *pl4e = l4e_from_page(page, L4_PROT);
+        } else
+            pl3e = map_domain_page(l4e_get_pfn(*pl4e));
+
+        pl3e += l3_table_offset(vphysmap_start);
+        if ( !l3e_get_intpte(*pl3e) )
+        {
+            if ( cpu_has_page1gb &&
+                 !(vphysmap_start & ((1UL << L3_PAGETABLE_SHIFT) - 1)) &&
+                 vphysmap_end >= vphysmap_start + (1UL << L3_PAGETABLE_SHIFT) &&
+                 (page = alloc_domheap_pages(d,
+                                             L3_PAGETABLE_SHIFT - PAGE_SHIFT,
+                                             0)) != NULL )
+            {
+                *pl3e = l3e_from_page(page, L1_PROT|_PAGE_DIRTY|_PAGE_PSE);
+                vphysmap_start += 1UL << L3_PAGETABLE_SHIFT;
+                continue;
+            }
+            if ( (page = alloc_domheap_page(d, 0)) == NULL )
+                break;
+
+            /* No mapping, PGC_allocated + page-table page. */
+            page->count_info = PGC_allocated | 2;
+            page->u.inuse.type_info = PGT_l2_page_table | PGT_validated | 1;
+            pl2e = __map_domain_page(page);
+            clear_page(pl2e);
+            *pl3e = l3e_from_page(page, L3_PROT);
+        }
+        else
+           pl2e = map_domain_page(l3e_get_pfn(*pl3e));
+
+        pl2e += l2_table_offset(vphysmap_start);
+        if ( !l2e_get_intpte(*pl2e) )
+        {
+            if ( !(vphysmap_start & ((1UL << L2_PAGETABLE_SHIFT) - 1)) &&
+                 vphysmap_end >= vphysmap_start + (1UL << L2_PAGETABLE_SHIFT) &&
+                 (page = alloc_domheap_pages(d,
+                                             L2_PAGETABLE_SHIFT - PAGE_SHIFT,
+                                             0)) != NULL )
+            {
+                *pl2e = l2e_from_page(page, L1_PROT|_PAGE_DIRTY|_PAGE_PSE);
+                if ( opt_allow_superpage )
+                    get_superpage(page_to_mfn(page), d);
+                vphysmap_start += 1UL << L2_PAGETABLE_SHIFT;
+                continue;
+            }
+            if ( (page = alloc_domheap_page(d, 0)) == NULL )
+                break;
+
+            /* No mapping, PGC_allocated + page-table page. */
+            page->count_info = PGC_allocated | 2;
+            page->u.inuse.type_info = PGT_l1_page_table | PGT_validated | 1;
+            pl1e = __map_domain_page(page);
+            clear_page(pl1e);
+            *pl2e = l2e_from_page(page, L2_PROT);
+        }
+        else
+            pl1e = map_domain_page(l2e_get_pfn(*pl2e));
+
+        pl1e += l1_table_offset(vphysmap_start);
+        BUG_ON(l1e_get_intpte(*pl1e));
+        page = alloc_domheap_page(d, 0);
+        if ( !page )
+            break;
+
+        *pl1e = l1e_from_page(page, L1_PROT|_PAGE_DIRTY);
+        vphysmap_start += PAGE_SIZE;
+        vphysmap_start &= PAGE_MASK;
+    }
+    if ( !page )
+        panic("Not enough RAM for DOM0 P->M table");
+
+    if ( pl1e )
+        unmap_domain_page(pl1e);
+    if ( pl2e )
+        unmap_domain_page(pl2e);
+    if ( pl3e )
+        unmap_domain_page(pl3e);
+
+    unmap_domain_page(l4start);
+}
+
 int __init construct_dom0(
     struct domain *d,
     const module_t *image, unsigned long image_headroom,
@@ -706,43 +887,8 @@ int __init construct_dom0(
     }
 
     /* Pages that are part of page tables must be read only. */
-    l4tab = l4start + l4_table_offset(vpt_start);
-    l3start = l3tab = l4e_to_l3e(*l4tab);
-    l3tab += l3_table_offset(vpt_start);
-    l2start = l2tab = l3e_to_l2e(*l3tab);
-    l2tab += l2_table_offset(vpt_start);
-    l1start = l1tab = l2e_to_l1e(*l2tab);
-    l1tab += l1_table_offset(vpt_start);
-    for ( count = 0; count < nr_pt_pages; count++ ) 
-    {
-        l1e_remove_flags(*l1tab, _PAGE_RW);
-        page = mfn_to_page(l1e_get_pfn(*l1tab));
-
-        /* Read-only mapping + PGC_allocated + page-table page. */
-        page->count_info         = PGC_allocated | 3;
-        page->u.inuse.type_info |= PGT_validated | 1;
-
-        /* Top-level p.t. is pinned. */
-        if ( (page->u.inuse.type_info & PGT_type_mask) ==
-             (!is_pv_32on64_domain(d) ?
-              PGT_l4_page_table : PGT_l3_page_table) )
-        {
-            page->count_info        += 1;
-            page->u.inuse.type_info += 1 | PGT_pinned;
-        }
-
-        /* Iterate. */
-        if ( !((unsigned long)++l1tab & (PAGE_SIZE - 1)) )
-        {
-            if ( !((unsigned long)++l2tab & (PAGE_SIZE - 1)) )
-            {
-                if ( !((unsigned long)++l3tab & (PAGE_SIZE - 1)) )
-                    l3start = l3tab = l4e_to_l3e(*++l4tab);
-                l2start = l2tab = l3e_to_l2e(*l3tab);
-            }
-            l1start = l1tab = l2e_to_l1e(*l2tab);
-        }
-    }
+    if  ( is_pv_domain(d) )
+        mark_pv_pt_pages_rdonly(d, l4start, vpt_start, nr_pt_pages);
 
     /* Mask all upcalls... */
     for ( i = 0; i < XEN_LEGACY_MAX_VCPUS; i++ )
@@ -814,132 +960,15 @@ int __init construct_dom0(
              elf_64bit(&elf) ? 64 : 32, parms.pae ? "p" : "");
 
     count = d->tot_pages;
-    l4start = map_domain_page(pagetable_get_pfn(v->arch.guest_table));
-    l3tab = NULL;
-    l2tab = NULL;
-    l1tab = NULL;
+
     /* Set up the phys->machine table if not part of the initial mapping. */
-    if ( parms.p2m_base != UNSET_ADDR )
+    if ( is_pv_domain(d) && parms.p2m_base != UNSET_ADDR )
     {
-        unsigned long va = vphysmap_start;
-
-        if ( v_start <= vphysmap_end && vphysmap_start <= v_end )
-            panic("DOM0 P->M table overlaps initial mapping");
-
-        while ( va < vphysmap_end )
-        {
-            if ( d->tot_pages + ((round_pgup(vphysmap_end) - va)
-                                 >> PAGE_SHIFT) + 3 > nr_pages )
-                panic("Dom0 allocation too small for initial P->M table");
-
-            if ( l1tab )
-            {
-                unmap_domain_page(l1tab);
-                l1tab = NULL;
-            }
-            if ( l2tab )
-            {
-                unmap_domain_page(l2tab);
-                l2tab = NULL;
-            }
-            if ( l3tab )
-            {
-                unmap_domain_page(l3tab);
-                l3tab = NULL;
-            }
-            l4tab = l4start + l4_table_offset(va);
-            if ( !l4e_get_intpte(*l4tab) )
-            {
-                page = alloc_domheap_page(d, 0);
-                if ( !page )
-                    break;
-                /* No mapping, PGC_allocated + page-table page. */
-                page->count_info = PGC_allocated | 2;
-                page->u.inuse.type_info =
-                    PGT_l3_page_table | PGT_validated | 1;
-                l3tab = __map_domain_page(page);
-                clear_page(l3tab);
-                *l4tab = l4e_from_page(page, L4_PROT);
-            } else
-                l3tab = map_domain_page(l4e_get_pfn(*l4tab));
-            l3tab += l3_table_offset(va);
-            if ( !l3e_get_intpte(*l3tab) )
-            {
-                if ( cpu_has_page1gb &&
-                     !(va & ((1UL << L3_PAGETABLE_SHIFT) - 1)) &&
-                     vphysmap_end >= va + (1UL << L3_PAGETABLE_SHIFT) &&
-                     (page = alloc_domheap_pages(d,
-                                                 L3_PAGETABLE_SHIFT -
-                                                     PAGE_SHIFT,
-                                                 0)) != NULL )
-                {
-                    *l3tab = l3e_from_page(page,
-                                           L1_PROT|_PAGE_DIRTY|_PAGE_PSE);
-                    va += 1UL << L3_PAGETABLE_SHIFT;
-                    continue;
-                }
-                if ( (page = alloc_domheap_page(d, 0)) == NULL )
-                    break;
-                /* No mapping, PGC_allocated + page-table page. */
-                page->count_info = PGC_allocated | 2;
-                page->u.inuse.type_info =
-                    PGT_l2_page_table | PGT_validated | 1;
-                l2tab = __map_domain_page(page);
-                clear_page(l2tab);
-                *l3tab = l3e_from_page(page, L3_PROT);
-            }
-            else
-               l2tab = map_domain_page(l3e_get_pfn(*l3tab));
-            l2tab += l2_table_offset(va);
-            if ( !l2e_get_intpte(*l2tab) )
-            {
-                if ( !(va & ((1UL << L2_PAGETABLE_SHIFT) - 1)) &&
-                     vphysmap_end >= va + (1UL << L2_PAGETABLE_SHIFT) &&
-                     (page = alloc_domheap_pages(d,
-                                                 L2_PAGETABLE_SHIFT -
-                                                     PAGE_SHIFT,
-                                                 0)) != NULL )
-                {
-                    *l2tab = l2e_from_page(page,
-                                           L1_PROT|_PAGE_DIRTY|_PAGE_PSE);
-                    if ( opt_allow_superpage )
-                        get_superpage(page_to_mfn(page), d);
-                    va += 1UL << L2_PAGETABLE_SHIFT;
-                    continue;
-                }
-                if ( (page = alloc_domheap_page(d, 0)) == NULL )
-                    break;
-                /* No mapping, PGC_allocated + page-table page. */
-                page->count_info = PGC_allocated | 2;
-                page->u.inuse.type_info =
-                    PGT_l1_page_table | PGT_validated | 1;
-                l1tab = __map_domain_page(page);
-                clear_page(l1tab);
-                *l2tab = l2e_from_page(page, L2_PROT);
-            }
-            else
-                l1tab = map_domain_page(l2e_get_pfn(*l2tab));
-            l1tab += l1_table_offset(va);
-            BUG_ON(l1e_get_intpte(*l1tab));
-            page = alloc_domheap_page(d, 0);
-            if ( !page )
-                break;
-            *l1tab = l1e_from_page(page, L1_PROT|_PAGE_DIRTY);
-            va += PAGE_SIZE;
-            va &= PAGE_MASK;
-        }
-        if ( !page )
-            panic("Not enough RAM for DOM0 P->M table");
+        pfn = pagetable_get_pfn(v->arch.guest_table);
+        setup_pv_physmap(d, pfn, v_start, v_end, vphysmap_start, vphysmap_end,
+                         nr_pages);
     }
 
-    if ( l1tab )
-        unmap_domain_page(l1tab);
-    if ( l2tab )
-        unmap_domain_page(l2tab);
-    if ( l3tab )
-        unmap_domain_page(l3tab);
-    unmap_domain_page(l4start);
-
     /* Write the phys->machine and machine->phys table entries. */
     for ( pfn = 0; pfn < count; pfn++ )
     {
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [V9 PATCH 2/8] pvh dom0: construct_dom0 changes
  2014-04-16  0:12 [V9 PATCH 0/8] pvh dom0 patches Mukesh Rathor
  2014-04-16  0:12 ` [V9 PATCH 1/8] pvh dom0: move some pv specific code to static functions Mukesh Rathor
@ 2014-04-16  0:12 ` Mukesh Rathor
  2014-04-16  0:12 ` [V9 PATCH 3/8] pvh dom0: Introduce p2m_map_foreign Mukesh Rathor
                   ` (6 subsequent siblings)
  8 siblings, 0 replies; 58+ messages in thread
From: Mukesh Rathor @ 2014-04-16  0:12 UTC (permalink / raw)
  To: xen-devel; +Cc: George.Dunlap, keir.xen, tim, JBeulich

This patch changes construct_dom0() to boot in pvh mode:
  - Make sure dom0 elf supports pvh mode.
  - Call guest_physmap_add_page for pvh rather than simple p2m setting
  - Map all non-RAM regions 1:1 upto the end region in e820 or 4GB which
    ever is higher.
  - Allocate p2m, copying calculation from toolstack.
  - Allocate shared info page from the virtual space so that dom0 PT
    can be updated. Then update p2m for it with the actual mfn.
  - Since we build the page tables for pvh same as for pv, in
    pvh_fixup_page_tables_for_hap we replace the mfns with pfns.

Signed-off-by: Mukesh Rathor <mukesh.rathor@oracle.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
---
 xen/arch/x86/domain_build.c | 249 +++++++++++++++++++++++++++++++++++++++++---
 xen/arch/x86/mm/hap/hap.c   |  12 +++
 xen/include/asm-x86/hap.h   |   1 +
 3 files changed, 245 insertions(+), 17 deletions(-)

diff --git a/xen/arch/x86/domain_build.c b/xen/arch/x86/domain_build.c
index f3ae9ad..43634f1 100644
--- a/xen/arch/x86/domain_build.c
+++ b/xen/arch/x86/domain_build.c
@@ -35,6 +35,7 @@
 #include <asm/setup.h>
 #include <asm/bzimage.h> /* for bzimage_parse */
 #include <asm/io_apic.h>
+#include <asm/hap.h>
 
 #include <public/version.h>
 
@@ -307,6 +308,158 @@ static void __init process_dom0_ioports_disable(void)
     }
 }
 
+static __init void pvh_add_mem_mapping(struct domain *d, unsigned long gfn,
+                                       unsigned long mfn, unsigned long nr_mfns)
+{
+    unsigned long i;
+    int rc;
+
+    for ( i = 0; i < nr_mfns; i++ )
+        if ( (rc = set_mmio_p2m_entry(d, gfn + i, _mfn(mfn + i))) )
+            panic("pvh_add_mem_mapping: gfn:%lx mfn:%lx i:%ld rc:%d\n",
+                  gfn, mfn, i, rc);
+}
+
+/*
+ * Set the 1:1 map for all non-RAM regions for dom 0. Thus, dom0 will have
+ * the entire io region mapped in the EPT/NPT.
+ *
+ * pvh fixme: The following doesn't map MMIO ranges when they sit above the
+ *            highest E820 covered address.
+ */
+static __init void pvh_map_all_iomem(struct domain *d)
+{
+    unsigned long start_pfn, end_pfn, end = 0, start = 0;
+    const struct e820entry *entry;
+    unsigned int i, nump;
+
+    for ( i = 0, entry = e820.map; i < e820.nr_map; i++, entry++ )
+    {
+        end = entry->addr + entry->size;
+
+        if ( entry->type == E820_RAM || entry->type == E820_UNUSABLE ||
+             i == e820.nr_map - 1 )
+        {
+            start_pfn = PFN_DOWN(start);
+
+            /* Unused RAM areas are marked UNUSABLE, so skip them too */
+            if ( entry->type == E820_RAM || entry->type == E820_UNUSABLE )
+                end_pfn = PFN_UP(entry->addr);
+            else
+                end_pfn = PFN_UP(end);
+
+            if ( start_pfn < end_pfn )
+            {
+                nump = end_pfn - start_pfn;
+                /* Add pages to the mapping */
+                pvh_add_mem_mapping(d, start_pfn, start_pfn, nump);
+            }
+            start = end;
+        }
+    }
+
+    /*
+     * Some BIOSes may not report io space above ram that is less than 4GB. So
+     * we map any non-ram upto 4GB.
+     */
+    if ( end < GB(4) )
+    {
+        start_pfn = PFN_UP(end);
+        end_pfn = (GB(4)) >> PAGE_SHIFT;
+        nump = end_pfn - start_pfn;
+        pvh_add_mem_mapping(d, start_pfn, start_pfn, nump);
+    }
+}
+
+static __init void dom0_update_physmap(struct domain *d, unsigned long pfn,
+                                   unsigned long mfn, unsigned long vphysmap_s)
+{
+    if ( is_pvh_domain(d) )
+    {
+        int rc = guest_physmap_add_page(d, pfn, mfn, 0);
+        BUG_ON(rc);
+        return;
+    }
+    if ( !is_pv_32on64_domain(d) )
+        ((unsigned long *)vphysmap_s)[pfn] = mfn;
+    else
+        ((unsigned int *)vphysmap_s)[pfn] = mfn;
+
+    set_gpfn_from_mfn(mfn, pfn);
+}
+
+/* Replace mfns with pfns in dom0 page tables */
+static __init void pvh_fixup_page_tables_for_hap(struct vcpu *v,
+                                                 unsigned long v_start,
+                                                 unsigned long v_end)
+{
+    int i, j, k;
+    l4_pgentry_t *pl4e, *l4start;
+    l3_pgentry_t *pl3e;
+    l2_pgentry_t *pl2e;
+    l1_pgentry_t *pl1e;
+    unsigned long cr3_pfn;
+
+    ASSERT(paging_mode_enabled(v->domain));
+
+    l4start = map_domain_page(pagetable_get_pfn(v->arch.guest_table));
+
+    /* Clear entries prior to guest L4 start */
+    pl4e = l4start + l4_table_offset(v_start);
+    memset(l4start, 0, (unsigned long)pl4e - (unsigned long)l4start);
+
+    for ( ; pl4e <= l4start + l4_table_offset(v_end - 1); pl4e++ )
+    {
+        pl3e = map_l3t_from_l4e(*pl4e);
+        for ( i = 0; i < PAGE_SIZE / sizeof(*pl3e); i++, pl3e++ )
+        {
+            if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) )
+                continue;
+
+            pl2e = map_l2t_from_l3e(*pl3e);
+            for ( j = 0; j < PAGE_SIZE / sizeof(*pl2e); j++, pl2e++ )
+            {
+                if ( !(l2e_get_flags(*pl2e)  & _PAGE_PRESENT) )
+                    continue;
+
+                pl1e = map_l1t_from_l2e(*pl2e);
+                for ( k = 0; k < PAGE_SIZE / sizeof(*pl1e); k++, pl1e++ )
+                {
+                    if ( !(l1e_get_flags(*pl1e) & _PAGE_PRESENT) )
+                        continue;
+
+                    *pl1e = l1e_from_pfn(get_gpfn_from_mfn(l1e_get_pfn(*pl1e)),
+                                         l1e_get_flags(*pl1e));
+                }
+                unmap_domain_page(pl1e);
+                *pl2e = l2e_from_pfn(get_gpfn_from_mfn(l2e_get_pfn(*pl2e)),
+                                     l2e_get_flags(*pl2e));
+            }
+            unmap_domain_page(pl2e);
+            *pl3e = l3e_from_pfn(get_gpfn_from_mfn(l3e_get_pfn(*pl3e)),
+                                 l3e_get_flags(*pl3e));
+        }
+        unmap_domain_page(pl3e);
+        *pl4e = l4e_from_pfn(get_gpfn_from_mfn(l4e_get_pfn(*pl4e)),
+                             l4e_get_flags(*pl4e));
+    }
+
+    /* Clear entries post guest L4. */
+    if ( (unsigned long)pl4e & (PAGE_SIZE - 1) )
+        memset(pl4e, 0, PAGE_SIZE - ((unsigned long)pl4e & (PAGE_SIZE - 1)));
+
+    unmap_domain_page(l4start);
+
+    cr3_pfn = get_gpfn_from_mfn(paddr_to_pfn(v->arch.cr3));
+    v->arch.hvm_vcpu.guest_cr[3] = pfn_to_paddr(cr3_pfn);
+
+    /*
+     * Finally, we update the paging modes (hap_update_paging_modes). This will
+     * create monitor_table for us, update v->arch.cr3, and update vmcs.cr3.
+     */
+    paging_update_paging_modes(v);
+}
+
 static __init void mark_pv_pt_pages_rdonly(struct domain *d,
                                            l4_pgentry_t *l4start,
                                            unsigned long vpt_start,
@@ -516,6 +669,8 @@ int __init construct_dom0(
     l3_pgentry_t *l3tab = NULL, *l3start = NULL;
     l2_pgentry_t *l2tab = NULL, *l2start = NULL;
     l1_pgentry_t *l1tab = NULL, *l1start = NULL;
+    paddr_t shared_info_paddr = 0;
+    u32 save_pvh_pg_mode = 0;
 
     /*
      * This fully describes the memory layout of the initial domain. All 
@@ -593,12 +748,21 @@ int __init construct_dom0(
         goto out;
     }
 
-    if ( parms.elf_notes[XEN_ELFNOTE_SUPPORTED_FEATURES].type != XEN_ENT_NONE &&
-         !test_bit(XENFEAT_dom0, parms.f_supported) )
+    if ( parms.elf_notes[XEN_ELFNOTE_SUPPORTED_FEATURES].type != XEN_ENT_NONE )
     {
-        printk("Kernel does not support Dom0 operation\n");
-        rc = -EINVAL;
-        goto out;
+        if ( !test_bit(XENFEAT_dom0, parms.f_supported) )
+        {
+            printk("Kernel does not support Dom0 operation\n");
+            rc = -EINVAL;
+            goto out;
+        }
+        if ( is_pvh_domain(d) &&
+             !test_bit(XENFEAT_hvm_callback_vector, parms.f_supported) )
+        {
+            printk("Kernel does not support PVH mode\n");
+            rc = -EINVAL;
+            goto out;
+        }
     }
 
     if ( compat32 )
@@ -663,6 +827,13 @@ int __init construct_dom0(
     vstartinfo_end   = (vstartinfo_start +
                         sizeof(struct start_info) +
                         sizeof(struct dom0_vga_console_info));
+
+    if ( is_pvh_domain(d) )
+    {
+        shared_info_paddr = round_pgup(vstartinfo_end) - v_start;
+        vstartinfo_end   += PAGE_SIZE;
+    }
+
     vpt_start        = round_pgup(vstartinfo_end);
     for ( nr_pt_pages = 2; ; nr_pt_pages++ )
     {
@@ -903,6 +1074,13 @@ int __init construct_dom0(
         (void)alloc_vcpu(d, i, cpu);
     }
 
+    /*
+     * pvh: we temporarily disable d->arch.paging.mode so that we can build cr3
+     * needed to run on dom0's page tables.
+     */
+    save_pvh_pg_mode = d->arch.paging.mode;
+    d->arch.paging.mode = 0;
+
     /* Set up CR3 value for write_ptbase */
     if ( paging_mode_enabled(d) )
         paging_update_paging_modes(v);
@@ -969,6 +1147,22 @@ int __init construct_dom0(
                          nr_pages);
     }
 
+    if ( is_pvh_domain(d) )
+    {
+        unsigned long hap_pages, memkb = nr_pages * (PAGE_SIZE / 1024);
+
+        /* Copied from: libxl_get_required_shadow_memory() */
+        memkb = 4 * (256 * d->max_vcpus + 2 * (memkb / 1024));
+        hap_pages = ( (memkb + 1023) / 1024) << (20 - PAGE_SHIFT);
+        hap_set_alloc_for_pvh_dom0(d, hap_pages);
+    }
+
+    /*
+     * We enable paging mode again so guest_physmap_add_page will do the
+     * right thing for us.
+     */
+    d->arch.paging.mode = save_pvh_pg_mode;
+
     /* Write the phys->machine and machine->phys table entries. */
     for ( pfn = 0; pfn < count; pfn++ )
     {
@@ -985,11 +1179,7 @@ int __init construct_dom0(
         if ( pfn > REVERSE_START && (vinitrd_start || pfn < initrd_pfn) )
             mfn = alloc_epfn - (pfn - REVERSE_START);
 #endif
-        if ( !is_pv_32on64_domain(d) )
-            ((unsigned long *)vphysmap_start)[pfn] = mfn;
-        else
-            ((unsigned int *)vphysmap_start)[pfn] = mfn;
-        set_gpfn_from_mfn(mfn, pfn);
+        dom0_update_physmap(d, pfn, mfn, vphysmap_start);
         if (!(pfn & 0xfffff))
             process_pending_softirqs();
     }
@@ -1005,8 +1195,8 @@ int __init construct_dom0(
             if ( !page->u.inuse.type_info &&
                  !get_page_and_type(page, d, PGT_writable_page) )
                 BUG();
-            ((unsigned long *)vphysmap_start)[pfn] = mfn;
-            set_gpfn_from_mfn(mfn, pfn);
+
+            dom0_update_physmap(d, pfn, mfn, vphysmap_start);
             ++pfn;
             if (!(pfn & 0xfffff))
                 process_pending_softirqs();
@@ -1026,11 +1216,7 @@ int __init construct_dom0(
 #ifndef NDEBUG
 #define pfn (nr_pages - 1 - (pfn - (alloc_epfn - alloc_spfn)))
 #endif
-            if ( !is_pv_32on64_domain(d) )
-                ((unsigned long *)vphysmap_start)[pfn] = mfn;
-            else
-                ((unsigned int *)vphysmap_start)[pfn] = mfn;
-            set_gpfn_from_mfn(mfn, pfn);
+            dom0_update_physmap(d, pfn, mfn, vphysmap_start);
 #undef pfn
             page++; pfn++;
             if (!(pfn & 0xfffff))
@@ -1054,6 +1240,15 @@ int __init construct_dom0(
         si->console.dom0.info_size = sizeof(struct dom0_vga_console_info);
     }
 
+    /*
+     * PVH: We need to update si->shared_info while we are on dom0 page tables,
+     * but need to defer the p2m update until after we have fixed up the
+     * page tables for PVH so that the m2p for the si pte entry returns
+     * correct pfn.
+     */
+    if ( is_pvh_domain(d) )
+        si->shared_info = shared_info_paddr;
+
     if ( is_pv_32on64_domain(d) )
         xlat_start_info(si, XLAT_start_info_console_dom0);
 
@@ -1087,8 +1282,15 @@ int __init construct_dom0(
     regs->eflags = X86_EFLAGS_IF;
 
     if ( opt_dom0_shadow )
+    {
+        if ( is_pvh_domain(d) )
+        {
+            printk("Unsupported option dom0_shadow for PVH\n");
+            return -EINVAL;
+        }
         if ( paging_enable(d, PG_SH_enable) == 0 ) 
             paging_update_paging_modes(v);
+    }
 
     if ( supervisor_mode_kernel )
     {
@@ -1178,6 +1380,19 @@ int __init construct_dom0(
         printk(" Xen warning: dom0 kernel broken ELF: %s\n",
                elf_check_broken(&elf));
 
+    if ( is_pvh_domain(d) )
+    {
+        /* finally, fixup the page table, replacing mfns with pfns */
+        pvh_fixup_page_tables_for_hap(v, v_start, v_end);
+
+        /* the pt has correct pfn for si, now update the mfn in the p2m */
+        mfn = virt_to_mfn(d->shared_info);
+        pfn = shared_info_paddr >> PAGE_SHIFT;
+        dom0_update_physmap(d, pfn, mfn, 0);
+
+        pvh_map_all_iomem(d);
+    }
+
     iommu_dom0_init(dom0);
     return 0;
 
diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
index b8c5422..68a0c25 100644
--- a/xen/arch/x86/mm/hap/hap.c
+++ b/xen/arch/x86/mm/hap/hap.c
@@ -589,6 +589,18 @@ int hap_domctl(struct domain *d, xen_domctl_shadow_op_t *sc,
     }
 }
 
+void __init hap_set_alloc_for_pvh_dom0(struct domain *d,
+                                       unsigned long hap_pages)
+{
+    int rc;
+
+    paging_lock(d);
+    rc = hap_set_allocation(d, hap_pages, NULL);
+    paging_unlock(d);
+
+    BUG_ON(rc);
+}
+
 static const struct paging_mode hap_paging_real_mode;
 static const struct paging_mode hap_paging_protected_mode;
 static const struct paging_mode hap_paging_pae_mode;
diff --git a/xen/include/asm-x86/hap.h b/xen/include/asm-x86/hap.h
index e03f983..5161927 100644
--- a/xen/include/asm-x86/hap.h
+++ b/xen/include/asm-x86/hap.h
@@ -63,6 +63,7 @@ int   hap_track_dirty_vram(struct domain *d,
                            XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
 
 extern const struct paging_mode *hap_paging_get_mode(struct vcpu *);
+void hap_set_alloc_for_pvh_dom0(struct domain *d, unsigned long num_pages);
 
 #endif /* XEN_HAP_H */
 
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [V9 PATCH 3/8] pvh dom0: Introduce p2m_map_foreign
  2014-04-16  0:12 [V9 PATCH 0/8] pvh dom0 patches Mukesh Rathor
  2014-04-16  0:12 ` [V9 PATCH 1/8] pvh dom0: move some pv specific code to static functions Mukesh Rathor
  2014-04-16  0:12 ` [V9 PATCH 2/8] pvh dom0: construct_dom0 changes Mukesh Rathor
@ 2014-04-16  0:12 ` Mukesh Rathor
  2014-04-16  0:12 ` [V9 PATCH 4/8] pvh dom0: Add checks and restrictions for p2m_is_foreign Mukesh Rathor
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 58+ messages in thread
From: Mukesh Rathor @ 2014-04-16  0:12 UTC (permalink / raw)
  To: xen-devel; +Cc: George.Dunlap, keir.xen, tim, JBeulich

In this patch, a new type p2m_map_foreign is introduced for pages
that toolstack on an auto translated dom0 or a control domain maps
from foreign domains that its creating or supporting during its
run time.

Signed-off-by: Mukesh Rathor <mukesh.rathor@oracle.com>
Acked-by: Tim Deegan <tim@xen.org>
---
 xen/arch/x86/mm/p2m-ept.c |  1 +
 xen/arch/x86/mm/p2m-pt.c  |  1 +
 xen/arch/x86/mm/p2m.c     | 22 +++++++++++++++++-----
 xen/include/asm-x86/p2m.h |  2 ++
 4 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c
index a219f8b..1728b57 100644
--- a/xen/arch/x86/mm/p2m-ept.c
+++ b/xen/arch/x86/mm/p2m-ept.c
@@ -75,6 +75,7 @@ static void ept_p2m_type_to_flags(ept_entry_t *entry, p2m_type_t type, p2m_acces
             entry->w = 0;
             break;
         case p2m_grant_map_rw:
+        case p2m_map_foreign:
             entry->r = entry->w = 1;
             entry->x = 0;
             break;
diff --git a/xen/arch/x86/mm/p2m-pt.c b/xen/arch/x86/mm/p2m-pt.c
index 85706ba..3b449b8 100644
--- a/xen/arch/x86/mm/p2m-pt.c
+++ b/xen/arch/x86/mm/p2m-pt.c
@@ -89,6 +89,7 @@ static unsigned long p2m_type_to_flags(p2m_type_t t, mfn_t mfn)
     case p2m_ram_rw:
         return flags | P2M_BASE_FLAGS | _PAGE_RW;
     case p2m_grant_map_rw:
+    case p2m_map_foreign:
         return flags | P2M_BASE_FLAGS | _PAGE_RW | _PAGE_NX_BIT;
     case p2m_mmio_direct:
         if ( !rangeset_contains_singleton(mmio_ro_ranges, mfn_x(mfn)) )
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index f3476da..9edf8c7 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -496,7 +496,7 @@ p2m_remove_page(struct p2m_domain *p2m, unsigned long gfn, unsigned long mfn,
         for ( i = 0; i < (1UL << page_order); i++ )
         {
             mfn_return = p2m->get_entry(p2m, gfn + i, &t, &a, 0, NULL);
-            if ( !p2m_is_grant(t) && !p2m_is_shared(t) )
+            if ( !p2m_is_grant(t) && !p2m_is_shared(t) && !p2m_is_foreign(t) )
                 set_gpfn_from_mfn(mfn+i, INVALID_M2P_ENTRY);
             ASSERT( !p2m_is_valid(t) || mfn + i == mfn_x(mfn_return) );
         }
@@ -744,9 +744,9 @@ void p2m_change_type_range(struct domain *d,
     p2m_unlock(p2m);
 }
 
-
 /* Returns: 0 for success, -errno for failure */
-int set_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn)
+static int set_typed_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
+                               p2m_type_t gfn_p2mt)
 {
     int rc = 0;
     p2m_access_t a;
@@ -771,8 +771,8 @@ int set_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn)
         set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
     }
 
-    P2M_DEBUG("set mmio %lx %lx\n", gfn, mfn_x(mfn));
-    rc = p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2m_mmio_direct,
+    P2M_DEBUG("set %d %lx %lx\n", gfn_p2mt, gfn, mfn_x(mfn));
+    rc = p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, gfn_p2mt,
                        p2m->default_access);
     gfn_unlock(p2m, gfn, 0);
     if ( rc )
@@ -782,6 +782,18 @@ int set_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn)
     return rc;
 }
 
+/* Set foreign mfn in the given guest's p2m table. */
+static int __attribute__((unused))
+set_foreign_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn)
+{
+    return set_typed_p2m_entry(d, gfn, mfn, p2m_map_foreign);
+}
+
+int set_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn)
+{
+    return set_typed_p2m_entry(d, gfn, mfn, p2m_mmio_direct);
+}
+
 /* Returns: 0 for success, -errno for failure */
 int clear_mmio_p2m_entry(struct domain *d, unsigned long gfn)
 {
diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h
index 0fdfbda..8aab0ad 100644
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -70,6 +70,7 @@ typedef enum {
     p2m_ram_paging_in = 11,       /* Memory that is being paged in */
     p2m_ram_shared = 12,          /* Shared or sharable memory */
     p2m_ram_broken = 13,          /* Broken page, access cause domain crash */
+    p2m_map_foreign  = 14,        /* ram pages from foreign domain */
 } p2m_type_t;
 
 /*
@@ -180,6 +181,7 @@ typedef unsigned int p2m_query_t;
 #define p2m_is_sharable(_t) (p2m_to_mask(_t) & P2M_SHARABLE_TYPES)
 #define p2m_is_shared(_t)   (p2m_to_mask(_t) & P2M_SHARED_TYPES)
 #define p2m_is_broken(_t)   (p2m_to_mask(_t) & P2M_BROKEN_TYPES)
+#define p2m_is_foreign(_t)  (p2m_to_mask(_t) & p2m_to_mask(p2m_map_foreign))
 
 /* Per-p2m-table state */
 struct p2m_domain {
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [V9 PATCH 4/8] pvh dom0: Add checks and restrictions for p2m_is_foreign
  2014-04-16  0:12 [V9 PATCH 0/8] pvh dom0 patches Mukesh Rathor
                   ` (2 preceding siblings ...)
  2014-04-16  0:12 ` [V9 PATCH 3/8] pvh dom0: Introduce p2m_map_foreign Mukesh Rathor
@ 2014-04-16  0:12 ` Mukesh Rathor
  2014-04-16 15:28   ` Jan Beulich
  2014-04-16  0:12 ` [V9 PATCH 5/8] pvh dom0: make xsm_map_gmfn_foreign available for x86 Mukesh Rathor
                   ` (4 subsequent siblings)
  8 siblings, 1 reply; 58+ messages in thread
From: Mukesh Rathor @ 2014-04-16  0:12 UTC (permalink / raw)
  To: xen-devel; +Cc: George.Dunlap, keir.xen, tim, JBeulich

In this patch, we add some checks and restrictions in the relevant
p2m paths for p2m_is_foreign.

Signed-off-by: Mukesh Rathor <mukesh.rathor@oracle.com>
---
 xen/arch/x86/mm/p2m-ept.c |  1 +
 xen/arch/x86/mm/p2m.c     | 12 +++++++++---
 xen/include/asm-x86/p2m.h |  1 +
 3 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c
index 1728b57..e6a0ef4 100644
--- a/xen/arch/x86/mm/p2m-ept.c
+++ b/xen/arch/x86/mm/p2m-ept.c
@@ -657,6 +657,7 @@ static void ept_change_entry_type_global(struct p2m_domain *p2m,
 
     BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt));
     BUG_ON(ot != nt && (ot == p2m_mmio_direct || nt == p2m_mmio_direct));
+    BUG_ON(p2m_is_foreign(nt) || (p2m_is_foreign(ot) && !p2m_is_invalid(nt)));
 
     ept_change_entry_type_page(_mfn(ept_get_asr(ept)),
                                ept_get_wl(ept), ot, nt);
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index 9edf8c7..996408a 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -547,6 +547,10 @@ guest_physmap_add_entry(struct domain *d, unsigned long gfn,
         return 0;
     }
 
+    /* foreign pages are added thru p2m_add_foreign */
+    if ( p2m_is_foreign(t) )
+        return -EINVAL;
+
     p2m_lock(p2m);
 
     P2M_DEBUG("adding gfn=%#lx mfn=%#lx\n", gfn, mfn);
@@ -581,9 +585,9 @@ guest_physmap_add_entry(struct domain *d, unsigned long gfn,
             omfn = p2m->get_entry(p2m, gfn + i, &ot, &a, 0, NULL);
             ASSERT(!p2m_is_shared(ot));
         }
-        if ( p2m_is_grant(ot) )
+        if ( p2m_is_grant(ot) || p2m_is_foreign(ot) )
         {
-            /* Really shouldn't be unmapping grant maps this way */
+            /* Really shouldn't be unmapping grant/foreign maps this way */
             domain_crash(d);
             p2m_unlock(p2m);
             
@@ -685,6 +689,7 @@ p2m_type_t p2m_change_type(struct domain *d, unsigned long gfn,
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
 
     BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt));
+    BUG_ON(p2m_is_foreign(nt) || (p2m_is_foreign(ot) && !p2m_is_invalid(nt)));
 
     gfn_lock(p2m, gfn, 0);
 
@@ -709,6 +714,7 @@ void p2m_change_type_range(struct domain *d,
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
 
     BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt));
+    BUG_ON(p2m_is_foreign(nt) || (p2m_is_foreign(ot) && !p2m_is_invalid(nt)));
 
     p2m_lock(p2m);
     p2m->defer_nested_flush = 1;
@@ -759,7 +765,7 @@ static int set_typed_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
 
     gfn_lock(p2m, gfn, 0);
     omfn = p2m->get_entry(p2m, gfn, &ot, &a, 0, NULL);
-    if ( p2m_is_grant(ot) )
+    if ( p2m_is_grant(ot) || p2m_is_foreign(ot) )
     {
         p2m_unlock(p2m);
         domain_crash(d);
diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h
index 8aab0ad..7aa71cc 100644
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -164,6 +164,7 @@ typedef unsigned int p2m_query_t;
 #define P2M_BROKEN_TYPES (p2m_to_mask(p2m_ram_broken))
 
 /* Useful predicates */
+#define p2m_is_invalid(_t)  (p2m_to_mask(_t) & p2m_to_mask(p2m_invalid))
 #define p2m_is_ram(_t) (p2m_to_mask(_t) & P2M_RAM_TYPES)
 #define p2m_is_hole(_t) (p2m_to_mask(_t) & P2M_HOLE_TYPES)
 #define p2m_is_mmio(_t) (p2m_to_mask(_t) & P2M_MMIO_TYPES)
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [V9 PATCH 5/8] pvh dom0: make xsm_map_gmfn_foreign available for x86
  2014-04-16  0:12 [V9 PATCH 0/8] pvh dom0 patches Mukesh Rathor
                   ` (3 preceding siblings ...)
  2014-04-16  0:12 ` [V9 PATCH 4/8] pvh dom0: Add checks and restrictions for p2m_is_foreign Mukesh Rathor
@ 2014-04-16  0:12 ` Mukesh Rathor
  2014-04-16 14:29   ` Daniel De Graaf
  2014-04-16  0:12 ` [V9 PATCH 6/8] pvh dom0: Add and remove foreign pages Mukesh Rathor
                   ` (3 subsequent siblings)
  8 siblings, 1 reply; 58+ messages in thread
From: Mukesh Rathor @ 2014-04-16  0:12 UTC (permalink / raw)
  To: xen-devel; +Cc: George.Dunlap, julien.grall, tim, keir.xen, JBeulich, dgdegra

In this patch we make xsm_map_gmfn_foreign available for x86 also. This
is used in the next patch "pvh dom0: Add and remove foreign pages" in
function p2m_add_foreign.

Signed-off-by: Mukesh Rathor <mukesh.rathor@oracle.com>
---
 xen/include/xsm/dummy.h | 14 ++++++--------
 xen/include/xsm/xsm.h   | 16 ++++++----------
 xen/xsm/dummy.c         |  4 +---
 xen/xsm/flask/hooks.c   | 18 ++++++------------
 4 files changed, 19 insertions(+), 33 deletions(-)

diff --git a/xen/include/xsm/dummy.h b/xen/include/xsm/dummy.h
index e722155..c3be99a 100644
--- a/xen/include/xsm/dummy.h
+++ b/xen/include/xsm/dummy.h
@@ -477,6 +477,12 @@ static XSM_INLINE int xsm_remove_from_physmap(XSM_DEFAULT_ARG struct domain *d1,
     return xsm_default_action(action, d1, d2);
 }
 
+static XSM_INLINE int xsm_map_gmfn_foreign(XSM_DEFAULT_ARG struct domain *d, struct domain *t)
+{
+    XSM_ASSERT_ACTION(XSM_TARGET);
+    return xsm_default_action(action, d, t);
+}
+
 static XSM_INLINE int xsm_hvm_param(XSM_DEFAULT_ARG struct domain *d, unsigned long op)
 {
     XSM_ASSERT_ACTION(XSM_TARGET);
@@ -624,11 +630,3 @@ static XSM_INLINE int xsm_ioport_mapping(XSM_DEFAULT_ARG struct domain *d, uint3
 }
 
 #endif /* CONFIG_X86 */
-
-#ifdef CONFIG_ARM
-static XSM_INLINE int xsm_map_gmfn_foreign(XSM_DEFAULT_ARG struct domain *d, struct domain *t)
-{
-    XSM_ASSERT_ACTION(XSM_TARGET);
-    return xsm_default_action(action, d, t);
-}
-#endif
diff --git a/xen/include/xsm/xsm.h b/xen/include/xsm/xsm.h
index 2cd3a3b..330d5d2 100644
--- a/xen/include/xsm/xsm.h
+++ b/xen/include/xsm/xsm.h
@@ -91,6 +91,7 @@ struct xsm_operations {
     int (*memory_pin_page) (struct domain *d1, struct domain *d2, struct page_info *page);
     int (*add_to_physmap) (struct domain *d1, struct domain *d2);
     int (*remove_from_physmap) (struct domain *d1, struct domain *d2);
+    int (*map_gmfn_foreign) (struct domain *d, struct domain *t);
     int (*claim_pages) (struct domain *d);
 
     int (*console_io) (struct domain *d, int cmd);
@@ -166,9 +167,6 @@ struct xsm_operations {
     int (*ioport_permission) (struct domain *d, uint32_t s, uint32_t e, uint8_t allow);
     int (*ioport_mapping) (struct domain *d, uint32_t s, uint32_t e, uint8_t allow);
 #endif
-#ifdef CONFIG_ARM
-    int (*map_gmfn_foreign) (struct domain *d, struct domain *t);
-#endif
 };
 
 #ifdef XSM_ENABLE
@@ -354,6 +352,11 @@ static inline int xsm_remove_from_physmap(xsm_default_t def, struct domain *d1,
     return xsm_ops->remove_from_physmap(d1, d2);
 }
 
+static inline int xsm_map_gmfn_foreign (xsm_default_t def, struct domain *d, struct domain *t)
+{
+    return xsm_ops->map_gmfn_foreign(d, t);
+}
+
 static inline int xsm_claim_pages(xsm_default_t def, struct domain *d)
 {
     return xsm_ops->claim_pages(d);
@@ -634,13 +637,6 @@ static inline int xsm_ioport_mapping (xsm_default_t def, struct domain *d, uint3
 }
 #endif /* CONFIG_X86 */
 
-#ifdef CONFIG_ARM
-static inline int xsm_map_gmfn_foreign (xsm_default_t def, struct domain *d, struct domain *t)
-{
-    return xsm_ops->map_gmfn_foreign(d, t);
-}
-#endif /* CONFIG_ARM */
-
 #endif /* XSM_NO_WRAPPERS */
 
 #ifdef CONFIG_MULTIBOOT
diff --git a/xen/xsm/dummy.c b/xen/xsm/dummy.c
index b79e10f..792a7fa 100644
--- a/xen/xsm/dummy.c
+++ b/xen/xsm/dummy.c
@@ -112,6 +112,7 @@ void xsm_fixup_ops (struct xsm_operations *ops)
 
     set_to_dummy_if_null(ops, add_to_physmap);
     set_to_dummy_if_null(ops, remove_from_physmap);
+    set_to_dummy_if_null(ops, map_gmfn_foreign);
 
 #ifdef CONFIG_X86
     set_to_dummy_if_null(ops, do_mca);
@@ -136,7 +137,4 @@ void xsm_fixup_ops (struct xsm_operations *ops)
     set_to_dummy_if_null(ops, ioport_permission);
     set_to_dummy_if_null(ops, ioport_mapping);
 #endif
-#ifdef CONFIG_ARM
-    set_to_dummy_if_null(ops, map_gmfn_foreign);
-#endif
 }
diff --git a/xen/xsm/flask/hooks.c b/xen/xsm/flask/hooks.c
index 4ce31c9..9a6b199 100644
--- a/xen/xsm/flask/hooks.c
+++ b/xen/xsm/flask/hooks.c
@@ -1078,6 +1078,11 @@ static int flask_remove_from_physmap(struct domain *d1, struct domain *d2)
     return domain_has_perm(d1, d2, SECCLASS_MMU, MMU__PHYSMAP);
 }
 
+static int flask_map_gmfn_foreign(struct domain *d, struct domain *t)
+{
+    return domain_has_perm(d, t, SECCLASS_MMU, MMU__MAP_READ | MMU__MAP_WRITE);
+}
+
 static int flask_hvm_param(struct domain *d, unsigned long op)
 {
     u32 perm;
@@ -1460,13 +1465,6 @@ static int flask_unbind_pt_irq (struct domain *d, struct xen_domctl_bind_pt_irq
 }
 #endif /* CONFIG_X86 */
 
-#ifdef CONFIG_ARM
-static int flask_map_gmfn_foreign(struct domain *d, struct domain *t)
-{
-    return domain_has_perm(d, t, SECCLASS_MMU, MMU__MAP_READ | MMU__MAP_WRITE);
-}
-#endif
-
 long do_flask_op(XEN_GUEST_HANDLE_PARAM(xsm_op_t) u_flask_op);
 int compat_flask_op(XEN_GUEST_HANDLE_PARAM(xsm_op_t) u_flask_op);
 
@@ -1548,7 +1546,7 @@ static struct xsm_operations flask_ops = {
 
     .add_to_physmap = flask_add_to_physmap,
     .remove_from_physmap = flask_remove_from_physmap,
-
+    .map_gmfn_foreign = flask_map_gmfn_foreign,
 
 #if defined(HAS_PASSTHROUGH) && defined(HAS_PCI)
     .get_device_group = flask_get_device_group,
@@ -1580,10 +1578,6 @@ static struct xsm_operations flask_ops = {
     .ioport_permission = flask_ioport_permission,
     .ioport_mapping = flask_ioport_mapping,
 #endif
-
-#ifdef CONFIG_ARM
-    .map_gmfn_foreign = flask_map_gmfn_foreign,
-#endif
 };
 
 static __init int flask_init(void)
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [V9 PATCH 6/8] pvh dom0: Add and remove foreign pages
  2014-04-16  0:12 [V9 PATCH 0/8] pvh dom0 patches Mukesh Rathor
                   ` (4 preceding siblings ...)
  2014-04-16  0:12 ` [V9 PATCH 5/8] pvh dom0: make xsm_map_gmfn_foreign available for x86 Mukesh Rathor
@ 2014-04-16  0:12 ` Mukesh Rathor
  2014-04-16 16:00   ` Jan Beulich
  2014-04-16  0:12 ` [V9 PATCH 7/8] pvh dom0: check for vioapic null ptr in vioapic_range Mukesh Rathor
                   ` (2 subsequent siblings)
  8 siblings, 1 reply; 58+ messages in thread
From: Mukesh Rathor @ 2014-04-16  0:12 UTC (permalink / raw)
  To: xen-devel
  Cc: JBeulich, George.Dunlap, tim, eddie.dong, keir.xen, jun.nakajima

In this patch, a new function, p2m_add_foreign(), is added
to map pages from foreign guest into dom0 for domU creation.
Such pages are typed p2m_map_foreign.  Note, it is the nature
of such pages that a refcnt is held during their stay in the p2m.
The refcnt is added and released in the low level ept function
atomic_write_ept_entry for convenience.
Also, p2m_teardown is changed to add a call to allow for the
cleanup of foreign pages during it's destruction.
Finally, get_pg_owner is changed to allow pvh to map foreign mappings,
and is made public to be used by p2m_add_foreign().

Signed-off-by: Mukesh Rathor <mukesh.rathor@oracle.com>
---
 xen/arch/x86/mm.c         |  11 +++--
 xen/arch/x86/mm/p2m-ept.c |  37 +++++++++++++---
 xen/arch/x86/mm/p2m-pt.c  |   7 +++
 xen/arch/x86/mm/p2m.c     | 110 +++++++++++++++++++++++++++++++++++++++++++---
 xen/include/asm-x86/mm.h  |   2 +
 xen/include/asm-x86/p2m.h |   7 +++
 6 files changed, 158 insertions(+), 16 deletions(-)

diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index fdc5ed3..e8ab68a 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -2794,7 +2794,7 @@ int new_guest_cr3(unsigned long mfn)
     return rc;
 }
 
-static struct domain *get_pg_owner(domid_t domid)
+struct domain *get_pg_owner(domid_t domid)
 {
     struct domain *pg_owner = NULL, *curr = current->domain;
 
@@ -2810,7 +2810,7 @@ static struct domain *get_pg_owner(domid_t domid)
         goto out;
     }
 
-    if ( unlikely(paging_mode_translate(curr)) )
+    if ( !is_pvh_domain(curr) && unlikely(paging_mode_translate(curr)) )
     {
         MEM_LOG("Cannot mix foreign mappings with translated domains");
         goto out;
@@ -2837,7 +2837,7 @@ static struct domain *get_pg_owner(domid_t domid)
     return pg_owner;
 }
 
-static void put_pg_owner(struct domain *pg_owner)
+void put_pg_owner(struct domain *pg_owner)
 {
     rcu_unlock_domain(pg_owner);
 }
@@ -4583,6 +4583,11 @@ int xenmem_add_to_physmap_one(
             page = mfn_to_page(mfn);
             break;
         }
+        case XENMAPSPACE_gmfn_foreign:
+        {
+            rc = p2m_add_foreign(d, idx, gpfn, foreign_domid);
+            return rc;
+        }
         default:
             break;
     }
diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c
index e6a0ef4..7dcfcb6 100644
--- a/xen/arch/x86/mm/p2m-ept.c
+++ b/xen/arch/x86/mm/p2m-ept.c
@@ -36,8 +36,6 @@
 
 #define atomic_read_ept_entry(__pepte)                              \
     ( (ept_entry_t) { .epte = read_atomic(&(__pepte)->epte) } )
-#define atomic_write_ept_entry(__pepte, __epte)                     \
-    write_atomic(&(__pepte)->epte, (__epte).epte)
 
 #define is_epte_present(ept_entry)      ((ept_entry)->epte & 0x7)
 #define is_epte_superpage(ept_entry)    ((ept_entry)->sp)
@@ -46,6 +44,33 @@ static inline bool_t is_epte_valid(ept_entry_t *e)
     return (e->epte != 0 && e->sa_p2mt != p2m_invalid);
 }
 
+static inline void atomic_write_ept_entry(ept_entry_t *entryptr,
+                                          const ept_entry_t *new)
+{
+    unsigned long oldmfn = 0;
+
+    if ( p2m_is_foreign(new->sa_p2mt) )
+    {
+        int rc;
+        struct page_info *page;
+        struct domain *fdom;
+
+        ASSERT(mfn_valid(new->mfn));
+        page = mfn_to_page(new->mfn);
+        fdom = page_get_owner(page);
+        ASSERT(fdom);
+        rc = get_page(page, fdom);
+        ASSERT(rc);
+    }
+    if ( p2m_is_foreign(entryptr->sa_p2mt) )
+        oldmfn = entryptr->mfn;
+
+    write_atomic(&entryptr->epte, new->epte);
+
+    if ( oldmfn )
+        put_page(mfn_to_page(oldmfn));
+}
+
 static void ept_p2m_type_to_flags(ept_entry_t *entry, p2m_type_t type, p2m_access_t access)
 {
     /* First apply type permissions */
@@ -377,7 +402,7 @@ ept_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
             ept_p2m_type_to_flags(&new_entry, p2mt, p2ma);
         }
 
-        atomic_write_ept_entry(ept_entry, new_entry);
+        atomic_write_ept_entry(ept_entry, &new_entry);
     }
     else
     {
@@ -398,7 +423,7 @@ ept_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
 
         /* now install the newly split ept sub-tree */
         /* NB: please make sure domian is paused and no in-fly VT-d DMA. */
-        atomic_write_ept_entry(ept_entry, split_ept_entry);
+        atomic_write_ept_entry(ept_entry, &split_ept_entry);
 
         /* then move to the level we want to make real changes */
         for ( ; i > target; i-- )
@@ -425,7 +450,7 @@ ept_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
 
         ept_p2m_type_to_flags(&new_entry, p2mt, p2ma);
 
-        atomic_write_ept_entry(ept_entry, new_entry);
+        atomic_write_ept_entry(ept_entry, &new_entry);
     }
 
     /* Track the highest gfn for which we have ever had a valid mapping */
@@ -641,7 +666,7 @@ static void ept_change_entry_type_page(mfn_t ept_page_mfn, int ept_page_level,
 
             e.sa_p2mt = nt;
             ept_p2m_type_to_flags(&e, nt, e.access);
-            atomic_write_ept_entry(&epte[i], e);
+            atomic_write_ept_entry(&epte[i], &e);
         }
     }
 
diff --git a/xen/arch/x86/mm/p2m-pt.c b/xen/arch/x86/mm/p2m-pt.c
index 3b449b8..915f60d 100644
--- a/xen/arch/x86/mm/p2m-pt.c
+++ b/xen/arch/x86/mm/p2m-pt.c
@@ -313,6 +313,13 @@ p2m_pt_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
         __trace_var(TRC_MEM_SET_P2M_ENTRY, 0, sizeof(t), &t);
     }
 
+    if ( p2m_is_foreign(p2mt) )
+    {
+        /* pvh fixme: foreign types are only supported on ept at present */
+        gdprintk(XENLOG_WARNING, "Unimplemented foreign p2m type.\n");
+        return -EINVAL;
+    }
+
     rc = p2m_next_level(p2m, &table_mfn, &table, &gfn_remainder, gfn,
                         L4_PAGETABLE_SHIFT - PAGE_SHIFT,
                         L4_PAGETABLE_ENTRIES, PGT_l3_page_table);
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index 996408a..67aa5f6 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -36,6 +36,7 @@
 #include <xen/event.h>
 #include <asm/hvm/nestedhvm.h>
 #include <asm/hvm/svm/amd-iommu-proto.h>
+#include <xsm/xsm.h>
 
 #include "mm-locks.h"
 
@@ -275,14 +276,18 @@ struct page_info *get_page_from_gfn_p2m(
         /* Fast path: look up and get out */
         p2m_read_lock(p2m);
         mfn = __get_gfn_type_access(p2m, gfn, t, a, 0, NULL, 0);
-        if ( (p2m_is_ram(*t) || p2m_is_grant(*t))
-             && mfn_valid(mfn)
+        if ( p2m_is_any_ram(*t) && mfn_valid(mfn)
              && !((q & P2M_UNSHARE) && p2m_is_shared(*t)) )
         {
             page = mfn_to_page(mfn);
-            if ( !get_page(page, d)
-                 /* Page could be shared */
-                 && !get_page(page, dom_cow) )
+            if ( p2m_is_foreign(*t) )
+            {
+                struct domain *fdom = page_get_owner_and_reference(page);
+                ASSERT(fdom && fdom != d);
+            }
+            else if ( !get_page(page, d)
+                      /* Page could be shared */
+                      && !get_page(page, dom_cow) )
                 page = NULL;
         }
         p2m_read_unlock(p2m);
@@ -451,6 +456,10 @@ void p2m_teardown(struct p2m_domain *p2m)
     d = p2m->domain;
 
     p2m_lock(p2m);
+    /* pvh: we must release refcnt on all foreign pages */
+    if ( is_pvh_domain(d) )
+        p2m_change_entry_type_global(d, p2m_map_foreign, p2m_invalid);
+
     ASSERT(atomic_read(&d->shr_pages) == 0);
     p2m->phys_table = pagetable_null();
 
@@ -789,8 +798,8 @@ static int set_typed_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
 }
 
 /* Set foreign mfn in the given guest's p2m table. */
-static int __attribute__((unused))
-set_foreign_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn)
+static int set_foreign_p2m_entry(struct domain *d, unsigned long gfn,
+                                 mfn_t mfn)
 {
     return set_typed_p2m_entry(d, gfn, mfn, p2m_map_foreign);
 }
@@ -1748,6 +1757,93 @@ out_p2m_audit:
 #endif /* P2M_AUDIT */
 
 /*
+ * Add frame from foreign domain to target domain's physmap. Similar to
+ * XENMAPSPACE_gmfn but the frame is foreign being mapped into current,
+ * and is not removed from foreign domain.
+ *
+ * Usage: - libxl on pvh dom0 creating a guest and doing privcmd_ioctl_mmap.
+ *        - xentrace running on dom0 mapping xenheap pages. fdid would be
+ *          DOMID_XEN in such a case.
+ *        etc..
+ *
+ * Side Effect: the mfn for fgfn will be refcounted in lower level routines
+ *              so it is not lost while mapped here. The refcnt is released
+ *              via the XENMEM_remove_from_physmap path.
+ *
+ * Returns: 0 ==> success
+ */
+int p2m_add_foreign(struct domain *tdom, unsigned long fgfn,
+                    unsigned long gpfn, domid_t fdid)
+{
+    p2m_type_t p2mt, p2mt_prev;
+    unsigned long prev_mfn, mfn;
+    struct page_info *page;
+    int rc = -EINVAL;
+    struct domain *fdom = NULL;
+
+    if ( fdid == DOMID_SELF )
+        goto out;
+
+    rc = -ESRCH;
+    fdom = get_pg_owner(fdid);
+    if ( fdom == NULL )
+        goto out;
+
+    rc = -EINVAL;
+    if ( tdom == NULL || tdom == fdom || !is_pvh_domain(tdom) )
+        goto out;
+
+    rc = xsm_map_gmfn_foreign(XSM_TARGET, tdom, fdom);
+    if ( rc )
+        goto out;
+
+    /* following will take a refcnt on the mfn */
+    page = get_page_from_gfn(fdom, fgfn, &p2mt, P2M_ALLOC);
+    if ( !page || !p2m_is_valid(p2mt) )
+    {
+        if ( page )
+            put_page(page);
+        goto out;
+    }
+    mfn = mfn_x(page_to_mfn(page));
+
+    /* Remove previously mapped page if it is present. */
+    prev_mfn = mfn_x(get_gfn(tdom, gpfn, &p2mt_prev));
+    if ( mfn_valid(_mfn(prev_mfn)) )
+    {
+        if ( is_xen_heap_mfn(prev_mfn) )
+            /* Xen heap frames are simply unhooked from this phys slot */
+            guest_physmap_remove_page(tdom, gpfn, prev_mfn, 0);
+        else
+            /* Normal domain memory is freed, to avoid leaking memory. */
+            guest_remove_page(tdom, gpfn);
+    }
+    /*
+     * Create the new mapping. Can't use guest_physmap_add_page() because it
+     * will update the m2p table which will result in  mfn -> gpfn of dom0
+     * and not fgfn of domU.
+     */
+    rc = set_foreign_p2m_entry(tdom, gpfn, _mfn(mfn));
+    if ( rc )
+        gdprintk(XENLOG_WARNING, "set_foreign_p2m_entry failed. "
+                 "gpfn:%lx mfn:%lx fgfn:%lx td:%d fd:%d\n",
+                 gpfn, mfn, fgfn, tdom->domain_id, fdom->domain_id);
+
+    put_page(page);
+
+    /*
+     * This put_gfn for the above get_gfn for prev_mfn.  We must do this
+     * after set_foreign_p2m_entry so another cpu doesn't populate the gpfn
+     * before us.
+     */
+    put_gfn(tdom, gpfn);
+
+out:
+    if ( fdom )
+        put_pg_owner(fdom);
+    return rc;
+}
+/*
  * Local variables:
  * mode: C
  * c-file-style: "BSD"
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index c835f76..afa6c48 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -358,6 +358,8 @@ int  put_old_guest_table(struct vcpu *);
 int  get_page_from_l1e(
     l1_pgentry_t l1e, struct domain *l1e_owner, struct domain *pg_owner);
 void put_page_from_l1e(l1_pgentry_t l1e, struct domain *l1e_owner);
+struct domain *get_pg_owner(domid_t domid);
+void put_pg_owner(struct domain *pg_owner);
 
 static inline void put_page_and_type(struct page_info *page)
 {
diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h
index 7aa71cc..47604da 100644
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -184,6 +184,10 @@ typedef unsigned int p2m_query_t;
 #define p2m_is_broken(_t)   (p2m_to_mask(_t) & P2M_BROKEN_TYPES)
 #define p2m_is_foreign(_t)  (p2m_to_mask(_t) & p2m_to_mask(p2m_map_foreign))
 
+#define p2m_is_any_ram(_t)  (p2m_to_mask(_t) &                   \
+                             (P2M_RAM_TYPES | P2M_GRANT_TYPES |  \
+                              p2m_to_mask(p2m_map_foreign)))
+
 /* Per-p2m-table state */
 struct p2m_domain {
     /* Lock that protects updates to the p2m */
@@ -513,6 +517,9 @@ p2m_type_t p2m_change_type(struct domain *d, unsigned long gfn,
 int set_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn);
 int clear_mmio_p2m_entry(struct domain *d, unsigned long gfn);
 
+/* Add foreign mapping to the guest's p2m table. */
+int p2m_add_foreign(struct domain *tdom, unsigned long fgfn,
+                    unsigned long gpfn, domid_t foreign_domid);
 
 /* 
  * Populate-on-demand
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [V9 PATCH 7/8] pvh dom0: check for vioapic null ptr in vioapic_range
  2014-04-16  0:12 [V9 PATCH 0/8] pvh dom0 patches Mukesh Rathor
                   ` (5 preceding siblings ...)
  2014-04-16  0:12 ` [V9 PATCH 6/8] pvh dom0: Add and remove foreign pages Mukesh Rathor
@ 2014-04-16  0:12 ` Mukesh Rathor
  2014-04-16 16:05   ` Jan Beulich
  2014-04-16  0:12 ` [V9 PATCH 8/8] pvh dom0: add opt_dom0pvh to setup.c Mukesh Rathor
  2014-04-16 14:57 ` [V9 PATCH 0/8] pvh dom0 patches Roger Pau Monné
  8 siblings, 1 reply; 58+ messages in thread
From: Mukesh Rathor @ 2014-04-16  0:12 UTC (permalink / raw)
  To: xen-devel
  Cc: JBeulich, George.Dunlap, tim, eddie.dong, keir.xen, jun.nakajima

pvh doesn't use apic emulation, as a result vioapic_init is not called
and vioapic ptr in struct hvm_domain is not initialized. One path that
would access the ptr for pvh is :

   hvm_hap_nested_page_fault -> handle_mmio -> hvmemul_do_io ->
       hvm_mmio_intercept -> vioapic_range

Signed-off-by: Mukesh Rathor <mukesh.rathor@oracle.com>
---
 xen/arch/x86/hvm/vioapic.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/xen/arch/x86/hvm/vioapic.c b/xen/arch/x86/hvm/vioapic.c
index d3c681b..0b343bd 100644
--- a/xen/arch/x86/hvm/vioapic.c
+++ b/xen/arch/x86/hvm/vioapic.c
@@ -238,10 +238,11 @@ static int vioapic_write(
 
 static int vioapic_range(struct vcpu *v, unsigned long addr)
 {
-    struct hvm_hw_vioapic *vioapic = domain_vioapic(v->domain);
+    struct hvm_vioapic *vioapic = v->domain->arch.hvm_domain.vioapic;
+    struct hvm_hw_vioapic *hwapic = domain_vioapic(v->domain);
 
-    return ((addr >= vioapic->base_address &&
-             (addr < vioapic->base_address + VIOAPIC_MEM_LENGTH)));
+    return (vioapic && (addr >= hwapic->base_address &&
+                        (addr < hwapic->base_address + VIOAPIC_MEM_LENGTH)));
 }
 
 const struct hvm_mmio_handler vioapic_mmio_handler = {
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [V9 PATCH 8/8] pvh dom0: add opt_dom0pvh to setup.c
  2014-04-16  0:12 [V9 PATCH 0/8] pvh dom0 patches Mukesh Rathor
                   ` (6 preceding siblings ...)
  2014-04-16  0:12 ` [V9 PATCH 7/8] pvh dom0: check for vioapic null ptr in vioapic_range Mukesh Rathor
@ 2014-04-16  0:12 ` Mukesh Rathor
  2014-04-16 12:57   ` Konrad Rzeszutek Wilk
                     ` (2 more replies)
  2014-04-16 14:57 ` [V9 PATCH 0/8] pvh dom0 patches Roger Pau Monné
  8 siblings, 3 replies; 58+ messages in thread
From: Mukesh Rathor @ 2014-04-16  0:12 UTC (permalink / raw)
  To: xen-devel; +Cc: George.Dunlap, keir.xen, tim, JBeulich

A pvh dom0 is booted by adding dom0pvh to grub xen command line.

Signed-off-by: Mukesh Rathor <mukesh.rathor@oracle.com>
---
 docs/misc/pvh-readme.txt |  2 ++
 xen/arch/x86/setup.c     | 12 +++++++++---
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/docs/misc/pvh-readme.txt b/docs/misc/pvh-readme.txt
index 9fea137..c5b3de4 100644
--- a/docs/misc/pvh-readme.txt
+++ b/docs/misc/pvh-readme.txt
@@ -37,6 +37,8 @@ supported. Phase I patches are broken into three parts:
    - tools changes for creating a PVH guest
    - boot of 64bit dom0 in PVH mode.
 
+To boot 64bit dom0 in PVH mode, add dom0pvh to grub xen command line.
+
 Following fixme's exist in the code:
   - arch/x86/time.c: support more tsc modes.
 
diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index e9c2c51..bd35f65 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -61,6 +61,10 @@ integer_param("maxcpus", max_cpus);
 static bool_t __initdata disable_smep;
 invbool_param("smep", disable_smep);
 
+/* Boot dom0 in pvh mode */
+static bool_t __initdata opt_dom0pvh;
+boolean_param("dom0pvh", opt_dom0pvh);
+
 /* **** Linux config option: propagated to domain0. */
 /* "acpi=off":    Sisables both ACPI table parsing and interpreter. */
 /* "acpi=force":  Override the disable blacklist.                   */
@@ -541,7 +545,7 @@ void __init noreturn __start_xen(unsigned long mbi_p)
 {
     char *memmap_type = NULL;
     char *cmdline, *kextra, *loader;
-    unsigned int initrdidx;
+    unsigned int initrdidx, domcr_flags = 0;
     multiboot_info_t *mbi = __va(mbi_p);
     module_t *mod = (module_t *)__va(mbi->mods_addr);
     unsigned long nr_pages, raw_max_page, modules_headroom, *module_map;
@@ -1337,8 +1341,10 @@ void __init noreturn __start_xen(unsigned long mbi_p)
     if ( !tboot_protect_mem_regions() )
         panic("Could not protect TXT memory regions");
 
-    /* Create initial domain 0. */
-    dom0 = domain_create(0, DOMCRF_s3_integrity, 0);
+     /* Create initial domain 0. */
+     domcr_flags = (opt_dom0pvh ? DOMCRF_pvh | DOMCRF_hap : 0);
+     domcr_flags |= DOMCRF_s3_integrity;
+     dom0 = domain_create(0, domcr_flags, 0);
     if ( IS_ERR(dom0) || (alloc_dom0_vcpu0() == NULL) )
         panic("Error creating domain 0");
 
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* Re: [V9 PATCH 8/8] pvh dom0: add opt_dom0pvh to setup.c
  2014-04-16  0:12 ` [V9 PATCH 8/8] pvh dom0: add opt_dom0pvh to setup.c Mukesh Rathor
@ 2014-04-16 12:57   ` Konrad Rzeszutek Wilk
  2014-04-16 13:01   ` Andrew Cooper
  2014-04-16 16:09   ` Jan Beulich
  2 siblings, 0 replies; 58+ messages in thread
From: Konrad Rzeszutek Wilk @ 2014-04-16 12:57 UTC (permalink / raw)
  To: Mukesh Rathor; +Cc: George.Dunlap, xen-devel, keir.xen, tim, JBeulich

On Tue, Apr 15, 2014 at 05:12:52PM -0700, Mukesh Rathor wrote:
> A pvh dom0 is booted by adding dom0pvh to grub xen command line.
> 
> Signed-off-by: Mukesh Rathor <mukesh.rathor@oracle.com>
> ---
>  docs/misc/pvh-readme.txt |  2 ++
>  xen/arch/x86/setup.c     | 12 +++++++++---


You are missing changes to docs/misc/xen-command-line.markdown
Otherwise, Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
>  2 files changed, 11 insertions(+), 3 deletions(-)
> 
> diff --git a/docs/misc/pvh-readme.txt b/docs/misc/pvh-readme.txt
> index 9fea137..c5b3de4 100644
> --- a/docs/misc/pvh-readme.txt
> +++ b/docs/misc/pvh-readme.txt
> @@ -37,6 +37,8 @@ supported. Phase I patches are broken into three parts:
>     - tools changes for creating a PVH guest
>     - boot of 64bit dom0 in PVH mode.
>  
> +To boot 64bit dom0 in PVH mode, add dom0pvh to grub xen command line.
> +
>  Following fixme's exist in the code:
>    - arch/x86/time.c: support more tsc modes.
>  
> diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
> index e9c2c51..bd35f65 100644
> --- a/xen/arch/x86/setup.c
> +++ b/xen/arch/x86/setup.c
> @@ -61,6 +61,10 @@ integer_param("maxcpus", max_cpus);
>  static bool_t __initdata disable_smep;
>  invbool_param("smep", disable_smep);
>  
> +/* Boot dom0 in pvh mode */
> +static bool_t __initdata opt_dom0pvh;
> +boolean_param("dom0pvh", opt_dom0pvh);
> +
>  /* **** Linux config option: propagated to domain0. */
>  /* "acpi=off":    Sisables both ACPI table parsing and interpreter. */
>  /* "acpi=force":  Override the disable blacklist.                   */
> @@ -541,7 +545,7 @@ void __init noreturn __start_xen(unsigned long mbi_p)
>  {
>      char *memmap_type = NULL;
>      char *cmdline, *kextra, *loader;
> -    unsigned int initrdidx;
> +    unsigned int initrdidx, domcr_flags = 0;
>      multiboot_info_t *mbi = __va(mbi_p);
>      module_t *mod = (module_t *)__va(mbi->mods_addr);
>      unsigned long nr_pages, raw_max_page, modules_headroom, *module_map;
> @@ -1337,8 +1341,10 @@ void __init noreturn __start_xen(unsigned long mbi_p)
>      if ( !tboot_protect_mem_regions() )
>          panic("Could not protect TXT memory regions");
>  
> -    /* Create initial domain 0. */
> -    dom0 = domain_create(0, DOMCRF_s3_integrity, 0);
> +     /* Create initial domain 0. */
> +     domcr_flags = (opt_dom0pvh ? DOMCRF_pvh | DOMCRF_hap : 0);
> +     domcr_flags |= DOMCRF_s3_integrity;
> +     dom0 = domain_create(0, domcr_flags, 0);
>      if ( IS_ERR(dom0) || (alloc_dom0_vcpu0() == NULL) )
>          panic("Error creating domain 0");
>  
> -- 
> 1.8.3.1
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [V9 PATCH 8/8] pvh dom0: add opt_dom0pvh to setup.c
  2014-04-16  0:12 ` [V9 PATCH 8/8] pvh dom0: add opt_dom0pvh to setup.c Mukesh Rathor
  2014-04-16 12:57   ` Konrad Rzeszutek Wilk
@ 2014-04-16 13:01   ` Andrew Cooper
  2014-04-16 16:09   ` Jan Beulich
  2 siblings, 0 replies; 58+ messages in thread
From: Andrew Cooper @ 2014-04-16 13:01 UTC (permalink / raw)
  To: Mukesh Rathor; +Cc: George.Dunlap, xen-devel, keir.xen, tim, JBeulich

On 16/04/14 01:12, Mukesh Rathor wrote:
> A pvh dom0 is booted by adding dom0pvh to grub xen command line.
>
> Signed-off-by: Mukesh Rathor <mukesh.rathor@oracle.com>
> ---
>  docs/misc/pvh-readme.txt |  2 ++
>  xen/arch/x86/setup.c     | 12 +++++++++---
>  2 files changed, 11 insertions(+), 3 deletions(-)
>
> diff --git a/docs/misc/pvh-readme.txt b/docs/misc/pvh-readme.txt
> index 9fea137..c5b3de4 100644
> --- a/docs/misc/pvh-readme.txt
> +++ b/docs/misc/pvh-readme.txt
> @@ -37,6 +37,8 @@ supported. Phase I patches are broken into three parts:
>     - tools changes for creating a PVH guest
>     - boot of 64bit dom0 in PVH mode.
>  
> +To boot 64bit dom0 in PVH mode, add dom0pvh to grub xen command line.
> +
>  Following fixme's exist in the code:
>    - arch/x86/time.c: support more tsc modes.
>  
> diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
> index e9c2c51..bd35f65 100644
> --- a/xen/arch/x86/setup.c
> +++ b/xen/arch/x86/setup.c
> @@ -61,6 +61,10 @@ integer_param("maxcpus", max_cpus);
>  static bool_t __initdata disable_smep;
>  invbool_param("smep", disable_smep);
>  
> +/* Boot dom0 in pvh mode */
> +static bool_t __initdata opt_dom0pvh;
> +boolean_param("dom0pvh", opt_dom0pvh);
> +
>  /* **** Linux config option: propagated to domain0. */
>  /* "acpi=off":    Sisables both ACPI table parsing and interpreter. */
>  /* "acpi=force":  Override the disable blacklist.                   */
> @@ -541,7 +545,7 @@ void __init noreturn __start_xen(unsigned long mbi_p)
>  {
>      char *memmap_type = NULL;
>      char *cmdline, *kextra, *loader;
> -    unsigned int initrdidx;
> +    unsigned int initrdidx, domcr_flags = 0;
>      multiboot_info_t *mbi = __va(mbi_p);
>      module_t *mod = (module_t *)__va(mbi->mods_addr);
>      unsigned long nr_pages, raw_max_page, modules_headroom, *module_map;
> @@ -1337,8 +1341,10 @@ void __init noreturn __start_xen(unsigned long mbi_p)
>      if ( !tboot_protect_mem_regions() )
>          panic("Could not protect TXT memory regions");
>  
> -    /* Create initial domain 0. */
> -    dom0 = domain_create(0, DOMCRF_s3_integrity, 0);
> +     /* Create initial domain 0. */
> +     domcr_flags = (opt_dom0pvh ? DOMCRF_pvh | DOMCRF_hap : 0);
> +     domcr_flags |= DOMCRF_s3_integrity;
> +     dom0 = domain_create(0, domcr_flags, 0);

tabs vs spaces?

~Andrew

>      if ( IS_ERR(dom0) || (alloc_dom0_vcpu0() == NULL) )
>          panic("Error creating domain 0");
>  

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [V9 PATCH 5/8] pvh dom0: make xsm_map_gmfn_foreign available for x86
  2014-04-16  0:12 ` [V9 PATCH 5/8] pvh dom0: make xsm_map_gmfn_foreign available for x86 Mukesh Rathor
@ 2014-04-16 14:29   ` Daniel De Graaf
  0 siblings, 0 replies; 58+ messages in thread
From: Daniel De Graaf @ 2014-04-16 14:29 UTC (permalink / raw)
  To: Mukesh Rathor, xen-devel
  Cc: George.Dunlap, keir.xen, tim, JBeulich, julien.grall

On 04/15/2014 08:12 PM, Mukesh Rathor wrote:
> In this patch we make xsm_map_gmfn_foreign available for x86 also. This
> is used in the next patch "pvh dom0: Add and remove foreign pages" in
> function p2m_add_foreign.
>
> Signed-off-by: Mukesh Rathor <mukesh.rathor@oracle.com>

Acked-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>

> ---
>   xen/include/xsm/dummy.h | 14 ++++++--------
>   xen/include/xsm/xsm.h   | 16 ++++++----------
>   xen/xsm/dummy.c         |  4 +---
>   xen/xsm/flask/hooks.c   | 18 ++++++------------
>   4 files changed, 19 insertions(+), 33 deletions(-)
>
> diff --git a/xen/include/xsm/dummy.h b/xen/include/xsm/dummy.h
> index e722155..c3be99a 100644
> --- a/xen/include/xsm/dummy.h
> +++ b/xen/include/xsm/dummy.h
> @@ -477,6 +477,12 @@ static XSM_INLINE int xsm_remove_from_physmap(XSM_DEFAULT_ARG struct domain *d1,
>       return xsm_default_action(action, d1, d2);
>   }
>
> +static XSM_INLINE int xsm_map_gmfn_foreign(XSM_DEFAULT_ARG struct domain *d, struct domain *t)
> +{
> +    XSM_ASSERT_ACTION(XSM_TARGET);
> +    return xsm_default_action(action, d, t);
> +}
> +
>   static XSM_INLINE int xsm_hvm_param(XSM_DEFAULT_ARG struct domain *d, unsigned long op)
>   {
>       XSM_ASSERT_ACTION(XSM_TARGET);
> @@ -624,11 +630,3 @@ static XSM_INLINE int xsm_ioport_mapping(XSM_DEFAULT_ARG struct domain *d, uint3
>   }
>
>   #endif /* CONFIG_X86 */
> -
> -#ifdef CONFIG_ARM
> -static XSM_INLINE int xsm_map_gmfn_foreign(XSM_DEFAULT_ARG struct domain *d, struct domain *t)
> -{
> -    XSM_ASSERT_ACTION(XSM_TARGET);
> -    return xsm_default_action(action, d, t);
> -}
> -#endif
> diff --git a/xen/include/xsm/xsm.h b/xen/include/xsm/xsm.h
> index 2cd3a3b..330d5d2 100644
> --- a/xen/include/xsm/xsm.h
> +++ b/xen/include/xsm/xsm.h
> @@ -91,6 +91,7 @@ struct xsm_operations {
>       int (*memory_pin_page) (struct domain *d1, struct domain *d2, struct page_info *page);
>       int (*add_to_physmap) (struct domain *d1, struct domain *d2);
>       int (*remove_from_physmap) (struct domain *d1, struct domain *d2);
> +    int (*map_gmfn_foreign) (struct domain *d, struct domain *t);
>       int (*claim_pages) (struct domain *d);
>
>       int (*console_io) (struct domain *d, int cmd);
> @@ -166,9 +167,6 @@ struct xsm_operations {
>       int (*ioport_permission) (struct domain *d, uint32_t s, uint32_t e, uint8_t allow);
>       int (*ioport_mapping) (struct domain *d, uint32_t s, uint32_t e, uint8_t allow);
>   #endif
> -#ifdef CONFIG_ARM
> -    int (*map_gmfn_foreign) (struct domain *d, struct domain *t);
> -#endif
>   };
>
>   #ifdef XSM_ENABLE
> @@ -354,6 +352,11 @@ static inline int xsm_remove_from_physmap(xsm_default_t def, struct domain *d1,
>       return xsm_ops->remove_from_physmap(d1, d2);
>   }
>
> +static inline int xsm_map_gmfn_foreign (xsm_default_t def, struct domain *d, struct domain *t)
> +{
> +    return xsm_ops->map_gmfn_foreign(d, t);
> +}
> +
>   static inline int xsm_claim_pages(xsm_default_t def, struct domain *d)
>   {
>       return xsm_ops->claim_pages(d);
> @@ -634,13 +637,6 @@ static inline int xsm_ioport_mapping (xsm_default_t def, struct domain *d, uint3
>   }
>   #endif /* CONFIG_X86 */
>
> -#ifdef CONFIG_ARM
> -static inline int xsm_map_gmfn_foreign (xsm_default_t def, struct domain *d, struct domain *t)
> -{
> -    return xsm_ops->map_gmfn_foreign(d, t);
> -}
> -#endif /* CONFIG_ARM */
> -
>   #endif /* XSM_NO_WRAPPERS */
>
>   #ifdef CONFIG_MULTIBOOT
> diff --git a/xen/xsm/dummy.c b/xen/xsm/dummy.c
> index b79e10f..792a7fa 100644
> --- a/xen/xsm/dummy.c
> +++ b/xen/xsm/dummy.c
> @@ -112,6 +112,7 @@ void xsm_fixup_ops (struct xsm_operations *ops)
>
>       set_to_dummy_if_null(ops, add_to_physmap);
>       set_to_dummy_if_null(ops, remove_from_physmap);
> +    set_to_dummy_if_null(ops, map_gmfn_foreign);
>
>   #ifdef CONFIG_X86
>       set_to_dummy_if_null(ops, do_mca);
> @@ -136,7 +137,4 @@ void xsm_fixup_ops (struct xsm_operations *ops)
>       set_to_dummy_if_null(ops, ioport_permission);
>       set_to_dummy_if_null(ops, ioport_mapping);
>   #endif
> -#ifdef CONFIG_ARM
> -    set_to_dummy_if_null(ops, map_gmfn_foreign);
> -#endif
>   }
> diff --git a/xen/xsm/flask/hooks.c b/xen/xsm/flask/hooks.c
> index 4ce31c9..9a6b199 100644
> --- a/xen/xsm/flask/hooks.c
> +++ b/xen/xsm/flask/hooks.c
> @@ -1078,6 +1078,11 @@ static int flask_remove_from_physmap(struct domain *d1, struct domain *d2)
>       return domain_has_perm(d1, d2, SECCLASS_MMU, MMU__PHYSMAP);
>   }
>
> +static int flask_map_gmfn_foreign(struct domain *d, struct domain *t)
> +{
> +    return domain_has_perm(d, t, SECCLASS_MMU, MMU__MAP_READ | MMU__MAP_WRITE);
> +}
> +
>   static int flask_hvm_param(struct domain *d, unsigned long op)
>   {
>       u32 perm;
> @@ -1460,13 +1465,6 @@ static int flask_unbind_pt_irq (struct domain *d, struct xen_domctl_bind_pt_irq
>   }
>   #endif /* CONFIG_X86 */
>
> -#ifdef CONFIG_ARM
> -static int flask_map_gmfn_foreign(struct domain *d, struct domain *t)
> -{
> -    return domain_has_perm(d, t, SECCLASS_MMU, MMU__MAP_READ | MMU__MAP_WRITE);
> -}
> -#endif
> -
>   long do_flask_op(XEN_GUEST_HANDLE_PARAM(xsm_op_t) u_flask_op);
>   int compat_flask_op(XEN_GUEST_HANDLE_PARAM(xsm_op_t) u_flask_op);
>
> @@ -1548,7 +1546,7 @@ static struct xsm_operations flask_ops = {
>
>       .add_to_physmap = flask_add_to_physmap,
>       .remove_from_physmap = flask_remove_from_physmap,
> -
> +    .map_gmfn_foreign = flask_map_gmfn_foreign,
>
>   #if defined(HAS_PASSTHROUGH) && defined(HAS_PCI)
>       .get_device_group = flask_get_device_group,
> @@ -1580,10 +1578,6 @@ static struct xsm_operations flask_ops = {
>       .ioport_permission = flask_ioport_permission,
>       .ioport_mapping = flask_ioport_mapping,
>   #endif
> -
> -#ifdef CONFIG_ARM
> -    .map_gmfn_foreign = flask_map_gmfn_foreign,
> -#endif
>   };
>
>   static __init int flask_init(void)
>


-- 
Daniel De Graaf
National Security Agency

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [V9 PATCH 0/8] pvh dom0 patches....
  2014-04-16  0:12 [V9 PATCH 0/8] pvh dom0 patches Mukesh Rathor
                   ` (7 preceding siblings ...)
  2014-04-16  0:12 ` [V9 PATCH 8/8] pvh dom0: add opt_dom0pvh to setup.c Mukesh Rathor
@ 2014-04-16 14:57 ` Roger Pau Monné
  2014-04-16 21:15   ` Mukesh Rathor
  8 siblings, 1 reply; 58+ messages in thread
From: Roger Pau Monné @ 2014-04-16 14:57 UTC (permalink / raw)
  To: Mukesh Rathor, xen-devel; +Cc: George.Dunlap, keir.xen, tim, JBeulich

On 16/04/14 02:12, Mukesh Rathor wrote:
> Hi,
> 
> Please find V9 of pvh dom0 patches. Based on commit: ac0f56a with p2m
> patches (rename and do errno propogation) underneath.
> 
>  git tree: git://oss.oracle.com/git/mrathor/xen.git  branch: dom0pvh-v9

Thanks for the refresh on the series, I think you forgot to push the
series to the git repo (or I at least I'm not seeing the dom0pvh-v9 branch).

Roger.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [V9 PATCH 4/8] pvh dom0: Add checks and restrictions for p2m_is_foreign
  2014-04-16  0:12 ` [V9 PATCH 4/8] pvh dom0: Add checks and restrictions for p2m_is_foreign Mukesh Rathor
@ 2014-04-16 15:28   ` Jan Beulich
  0 siblings, 0 replies; 58+ messages in thread
From: Jan Beulich @ 2014-04-16 15:28 UTC (permalink / raw)
  To: Mukesh Rathor; +Cc: George.Dunlap, xen-devel, keir.xen, tim

>>> On 16.04.14 at 02:12, <mukesh.rathor@oracle.com> wrote:
> --- a/xen/arch/x86/mm/p2m-ept.c
> +++ b/xen/arch/x86/mm/p2m-ept.c
> @@ -657,6 +657,7 @@ static void ept_change_entry_type_global(struct 
> p2m_domain *p2m,
>  
>      BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt));
>      BUG_ON(ot != nt && (ot == p2m_mmio_direct || nt == p2m_mmio_direct));
> +    BUG_ON(p2m_is_foreign(nt) || (p2m_is_foreign(ot) && !p2m_is_invalid(nt)));

So what would a p2m_foreign -> p2m_invalid (permitted by this)
transition mean?

Also, as I'm about to severely restrict the set of types that can be
passed into this and the corresponding (generic) range functions, do
you have any intended use case for this (making it so you need to
permit that transition)?

Jan

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [V9 PATCH 6/8] pvh dom0: Add and remove foreign pages
  2014-04-16  0:12 ` [V9 PATCH 6/8] pvh dom0: Add and remove foreign pages Mukesh Rathor
@ 2014-04-16 16:00   ` Jan Beulich
  2014-04-17  1:37     ` Mukesh Rathor
  0 siblings, 1 reply; 58+ messages in thread
From: Jan Beulich @ 2014-04-16 16:00 UTC (permalink / raw)
  To: Mukesh Rathor
  Cc: George.Dunlap, tim, eddie.dong, keir.xen, jun.nakajima, xen-devel

>>> On 16.04.14 at 02:12, <mukesh.rathor@oracle.com> wrote:
> In this patch, a new function, p2m_add_foreign(), is added
> to map pages from foreign guest into dom0 for domU creation.
> Such pages are typed p2m_map_foreign.  Note, it is the nature
> of such pages that a refcnt is held during their stay in the p2m.
> The refcnt is added and released in the low level ept function
> atomic_write_ept_entry for convenience.
> Also, p2m_teardown is changed to add a call to allow for the
> cleanup of foreign pages during it's destruction.
> Finally, get_pg_owner is changed to allow pvh to map foreign mappings,
> and is made public to be used by p2m_add_foreign().

Do you really need to do this at this low a layer? I'm afraid that's
going to be rather limiting when wanting to use the bits used for the
p2m type for different purposes in intermediate table entries. I.e. I
think this special casing should only be done for leaf entries, and
hence at least one layer further up, or introduce something named
e.g. atomic_write_epte_leaf().

> @@ -4583,6 +4583,11 @@ int xenmem_add_to_physmap_one(
>              page = mfn_to_page(mfn);
>              break;
>          }
> +        case XENMAPSPACE_gmfn_foreign:
> +        {
> +            rc = p2m_add_foreign(d, idx, gpfn, foreign_domid);
> +            return rc;
> +        }

No pointless braces please.

> @@ -46,6 +44,33 @@ static inline bool_t is_epte_valid(ept_entry_t *e)
>      return (e->epte != 0 && e->sa_p2mt != p2m_invalid);
>  }
>  
> +static inline void atomic_write_ept_entry(ept_entry_t *entryptr,
> +                                          const ept_entry_t *new)

I don't see a reason not to pass new by value. Also I think you would
want to drop the "inline" considering the size of the function.

> +{
> +    unsigned long oldmfn = 0;
> +
> +    if ( p2m_is_foreign(new->sa_p2mt) )

And you clearly want to wrap this in unlikely().

> +    {
> +        int rc;
> +        struct page_info *page;
> +        struct domain *fdom;
> +
> +        ASSERT(mfn_valid(new->mfn));
> +        page = mfn_to_page(new->mfn);
> +        fdom = page_get_owner(page);
> +        ASSERT(fdom);
> +        rc = get_page(page, fdom);
> +        ASSERT(rc);

I'm afraid you can't rely on this to succeed: A PV guest could have
mapped the page so many times that you can't get another ref.

> +    }
> +    if ( p2m_is_foreign(entryptr->sa_p2mt) )
> +        oldmfn = entryptr->mfn;
> +
> +    write_atomic(&entryptr->epte, new->epte);
> +
> +    if ( oldmfn )
> +        put_page(mfn_to_page(oldmfn));

Is it perhaps worthwhile avoiding the get/put pair if neither the MFN
nor the type change?

> @@ -275,14 +276,18 @@ struct page_info *get_page_from_gfn_p2m(
>          /* Fast path: look up and get out */
>          p2m_read_lock(p2m);
>          mfn = __get_gfn_type_access(p2m, gfn, t, a, 0, NULL, 0);
> -        if ( (p2m_is_ram(*t) || p2m_is_grant(*t))
> -             && mfn_valid(mfn)
> +        if ( p2m_is_any_ram(*t) && mfn_valid(mfn)
>               && !((q & P2M_UNSHARE) && p2m_is_shared(*t)) )
>          {
>              page = mfn_to_page(mfn);
> -            if ( !get_page(page, d)
> -                 /* Page could be shared */
> -                 && !get_page(page, dom_cow) )
> +            if ( p2m_is_foreign(*t) )
> +            {
> +                struct domain *fdom = page_get_owner_and_reference(page);
> +                ASSERT(fdom && fdom != d);

Same here regarding the assumption of guaranteed success.

> @@ -451,6 +456,10 @@ void p2m_teardown(struct p2m_domain *p2m)
>      d = p2m->domain;
>  
>      p2m_lock(p2m);
> +    /* pvh: we must release refcnt on all foreign pages */
> +    if ( is_pvh_domain(d) )
> +        p2m_change_entry_type_global(d, p2m_map_foreign, p2m_invalid);

Ah, here it comes. But no, this is a no-go. The function currently is
running for unbounded time, which I'm about to change (pending
some more testing; seems to generally work). I can't, however, see
an immediate way to integrate this into the new model (where type
changes get propagated down the page table hierarchy upon access
rather than when the type change is requested), largely because I'm
afraid the number of foreign mappings that a domain may want to
establish needs to be pretty much unbounded (i.e. can't be tracked
in a range set, linked list, or some such).

Why can't this be done as pages get de-allocated (which is a
preemptable process already)?

> +int p2m_add_foreign(struct domain *tdom, unsigned long fgfn,
> +                    unsigned long gpfn, domid_t fdid)

Please call this foreigndom, just like other code does, allowing for
it to be found by grep-ing the tree.

> +{
> +    p2m_type_t p2mt, p2mt_prev;
> +    unsigned long prev_mfn, mfn;
> +    struct page_info *page;
> +    int rc = -EINVAL;
> +    struct domain *fdom = NULL;
> +
> +    if ( fdid == DOMID_SELF )
> +        goto out;
> +
> +    rc = -ESRCH;
> +    fdom = get_pg_owner(fdid);
> +    if ( fdom == NULL )
> +        goto out;
> +
> +    rc = -EINVAL;
> +    if ( tdom == NULL || tdom == fdom || !is_pvh_domain(tdom) )

Is tdom == NULL a reasonable thing to expect here. I.e. can't you
rather ASSERT(tdom)?

> +        goto out;
> +
> +    rc = xsm_map_gmfn_foreign(XSM_TARGET, tdom, fdom);
> +    if ( rc )
> +        goto out;
> +
> +    /* following will take a refcnt on the mfn */
> +    page = get_page_from_gfn(fdom, fgfn, &p2mt, P2M_ALLOC);
> +    if ( !page || !p2m_is_valid(p2mt) )

Is this really p2m_is_valid() (i.e. including various types apart from
p2m_ram_rw)?

Jan

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [V9 PATCH 7/8] pvh dom0: check for vioapic null ptr in vioapic_range
  2014-04-16  0:12 ` [V9 PATCH 7/8] pvh dom0: check for vioapic null ptr in vioapic_range Mukesh Rathor
@ 2014-04-16 16:05   ` Jan Beulich
  2014-04-17  1:44     ` Mukesh Rathor
  0 siblings, 1 reply; 58+ messages in thread
From: Jan Beulich @ 2014-04-16 16:05 UTC (permalink / raw)
  To: Mukesh Rathor
  Cc: George.Dunlap, tim, eddie.dong, keir.xen, jun.nakajima, xen-devel

>>> On 16.04.14 at 02:12, <mukesh.rathor@oracle.com> wrote:
> pvh doesn't use apic emulation, as a result vioapic_init is not called
> and vioapic ptr in struct hvm_domain is not initialized. One path that
> would access the ptr for pvh is :
> 
>    hvm_hap_nested_page_fault -> handle_mmio -> hvmemul_do_io ->
>        hvm_mmio_intercept -> vioapic_range

Given this I'm not sure the guard belongs here. The majority of the
handle_mmio() logic should never be used for Dom0. Perhaps you
should simply have a pvh_mmio_handlers[] paralleling
hvm_mmio_handlers[], but (presumably) only having HPET and MSI-X
entries for now?

Jan

> --- a/xen/arch/x86/hvm/vioapic.c
> +++ b/xen/arch/x86/hvm/vioapic.c
> @@ -238,10 +238,11 @@ static int vioapic_write(
>  
>  static int vioapic_range(struct vcpu *v, unsigned long addr)
>  {
> -    struct hvm_hw_vioapic *vioapic = domain_vioapic(v->domain);
> +    struct hvm_vioapic *vioapic = v->domain->arch.hvm_domain.vioapic;
> +    struct hvm_hw_vioapic *hwapic = domain_vioapic(v->domain);
>  
> -    return ((addr >= vioapic->base_address &&
> -             (addr < vioapic->base_address + VIOAPIC_MEM_LENGTH)));
> +    return (vioapic && (addr >= hwapic->base_address &&
> +                        (addr < hwapic->base_address + VIOAPIC_MEM_LENGTH)));
>  }
>  
>  const struct hvm_mmio_handler vioapic_mmio_handler = {
> -- 
> 1.8.3.1

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [V9 PATCH 8/8] pvh dom0: add opt_dom0pvh to setup.c
  2014-04-16  0:12 ` [V9 PATCH 8/8] pvh dom0: add opt_dom0pvh to setup.c Mukesh Rathor
  2014-04-16 12:57   ` Konrad Rzeszutek Wilk
  2014-04-16 13:01   ` Andrew Cooper
@ 2014-04-16 16:09   ` Jan Beulich
  2 siblings, 0 replies; 58+ messages in thread
From: Jan Beulich @ 2014-04-16 16:09 UTC (permalink / raw)
  To: Mukesh Rathor; +Cc: George.Dunlap, xen-devel, keir.xen, tim

>>> On 16.04.14 at 02:12, <mukesh.rathor@oracle.com> wrote:
> @@ -541,7 +545,7 @@ void __init noreturn __start_xen(unsigned long mbi_p)
>  {
>      char *memmap_type = NULL;
>      char *cmdline, *kextra, *loader;
> -    unsigned int initrdidx;
> +    unsigned int initrdidx, domcr_flags = 0;
>      multiboot_info_t *mbi = __va(mbi_p);
>      module_t *mod = (module_t *)__va(mbi->mods_addr);
>      unsigned long nr_pages, raw_max_page, modules_headroom, *module_map;
> @@ -1337,8 +1341,10 @@ void __init noreturn __start_xen(unsigned long mbi_p)
>      if ( !tboot_protect_mem_regions() )
>          panic("Could not protect TXT memory regions");
>  
> -    /* Create initial domain 0. */
> -    dom0 = domain_create(0, DOMCRF_s3_integrity, 0);
> +     /* Create initial domain 0. */
> +     domcr_flags = (opt_dom0pvh ? DOMCRF_pvh | DOMCRF_hap : 0);
> +     domcr_flags |= DOMCRF_s3_integrity;

Please use DOMCRF_s3_integrity as the initializer above (or drop
the initializer and set the variable to that value unconditionally first),
and then simply have

     if ( opt_dom0pvh )
         domcr_flags |= DOMCRF_pvh | DOMCRF_hap;

Jan

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [V9 PATCH 0/8] pvh dom0 patches....
  2014-04-16 14:57 ` [V9 PATCH 0/8] pvh dom0 patches Roger Pau Monné
@ 2014-04-16 21:15   ` Mukesh Rathor
  0 siblings, 0 replies; 58+ messages in thread
From: Mukesh Rathor @ 2014-04-16 21:15 UTC (permalink / raw)
  To: Roger Pau Monné; +Cc: George.Dunlap, xen-devel, keir.xen, tim, JBeulich

On Wed, 16 Apr 2014 16:57:02 +0200
Roger Pau Monné <roger.pau@citrix.com> wrote:

> On 16/04/14 02:12, Mukesh Rathor wrote:
> > Hi,
> > 
> > Please find V9 of pvh dom0 patches. Based on commit: ac0f56a with
> > p2m patches (rename and do errno propogation) underneath.
> > 
> >  git tree: git://oss.oracle.com/git/mrathor/xen.git  branch:
> > dom0pvh-v9
> 
> Thanks for the refresh on the series, I think you forgot to push the
> series to the git repo (or I at least I'm not seeing the dom0pvh-v9
> branch).
> 
> Roger.

Ooops, forgot. Done! Also, I'll submit your TS fixup patch later, I
realize I didn't include in this series.

thanks
mukesh
 

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [V9 PATCH 6/8] pvh dom0: Add and remove foreign pages
  2014-04-16 16:00   ` Jan Beulich
@ 2014-04-17  1:37     ` Mukesh Rathor
  2014-04-17  6:50       ` Jan Beulich
  2014-04-22  0:19       ` Mukesh Rathor
  0 siblings, 2 replies; 58+ messages in thread
From: Mukesh Rathor @ 2014-04-17  1:37 UTC (permalink / raw)
  To: Jan Beulich
  Cc: George.Dunlap, tim, eddie.dong, keir.xen, jun.nakajima, xen-devel

On Wed, 16 Apr 2014 17:00:35 +0100
"Jan Beulich" <JBeulich@suse.com> wrote:

> >>> On 16.04.14 at 02:12, <mukesh.rathor@oracle.com> wrote:
> > In this patch, a new function, p2m_add_foreign(), is added
> > to map pages from foreign guest into dom0 for domU creation.
> > Such pages are typed p2m_map_foreign.  Note, it is the nature
> > of such pages that a refcnt is held during their stay in the p2m.
> > The refcnt is added and released in the low level ept function
> > atomic_write_ept_entry for convenience.
> > Also, p2m_teardown is changed to add a call to allow for the
> > cleanup of foreign pages during it's destruction.
> > Finally, get_pg_owner is changed to allow pvh to map foreign
> > mappings, and is made public to be used by p2m_add_foreign().
> 
> Do you really need to do this at this low a layer? I'm afraid that's
> going to be rather limiting when wanting to use the bits used for the
> p2m type for different purposes in intermediate table entries. I.e. I
> think this special casing should only be done for leaf entries, and
> hence at least one layer further up, or introduce something named
> e.g. atomic_write_epte_leaf().

Well, Tim and I went back and forth several times on this over the
last several months (you were cc'd :) ). 

Since a refcnt absolutely needs to be taken and released for such 
pages while they are mapped, the best way to ensure that was to do
it at the lowest level. This was Tim's suggestion, my earlier patches
did at higher levels. At present, there is a single purpose for foreign
types. But future uses, unlikely at this point, can make any relevant
enhancements.

> > @@ -4583,6 +4583,11 @@ int xenmem_add_to_physmap_one(
> >              page = mfn_to_page(mfn);
> >              break;
> >          }
> > +        case XENMAPSPACE_gmfn_foreign:
> > +        {
> > +            rc = p2m_add_foreign(d, idx, gpfn, foreign_domid);
> > +            return rc;
> > +        }
> 
> No pointless braces please.

Often, case statement get sooo long that having braces makes it easier
to find matching code, specially when indentation is weird for case 
statements, and uglier still, switch statements within switch statements 
are allowed. 

But I'll remove it here anyways rather than argue, even tho the prev
case has {}.

 
> > @@ -46,6 +44,33 @@ static inline bool_t is_epte_valid(ept_entry_t
> > *e) return (e->epte != 0 && e->sa_p2mt != p2m_invalid);
> >  }
> >  
> > +static inline void atomic_write_ept_entry(ept_entry_t *entryptr,
> > +                                          const ept_entry_t *new)
> 
> I don't see a reason not to pass new by value. Also I think you would
> want to drop the "inline" considering the size of the function.

ok. I guess entries are unlikely to change u64 size.

> > +{
> > +    unsigned long oldmfn = 0;
> > +
> > +    if ( p2m_is_foreign(new->sa_p2mt) )
> 
> And you clearly want to wrap this in unlikely().

Ok.

> > +    {
> > +        int rc;
> > +        struct page_info *page;
> > +        struct domain *fdom;
> > +
> > +        ASSERT(mfn_valid(new->mfn));
> > +        page = mfn_to_page(new->mfn);
> > +        fdom = page_get_owner(page);
> > +        ASSERT(fdom);
> > +        rc = get_page(page, fdom);
> > +        ASSERT(rc);
> 
> I'm afraid you can't rely on this to succeed: A PV guest could have
> mapped the page so many times that you can't get another ref.

foreign pages are valid for PVH/HVM only, PV would never get here.
I can return rc instead of ASSERT. In the prev version review, you
had asked for an ASSERT here.


> > +    }
> > +    if ( p2m_is_foreign(entryptr->sa_p2mt) )
> > +        oldmfn = entryptr->mfn;
> > +
> > +    write_atomic(&entryptr->epte, new->epte);
> > +
> > +    if ( oldmfn )
> > +        put_page(mfn_to_page(oldmfn));
> 
> Is it perhaps worthwhile avoiding the get/put pair if neither the MFN
> nor the type change?

Such a change is so not allowed at present. 


> > @@ -275,14 +276,18 @@ struct page_info *get_page_from_gfn_p2m(
> >          /* Fast path: look up and get out */
> >          p2m_read_lock(p2m);
> >          mfn = __get_gfn_type_access(p2m, gfn, t, a, 0, NULL, 0);
> > -        if ( (p2m_is_ram(*t) || p2m_is_grant(*t))
> > -             && mfn_valid(mfn)
> > +        if ( p2m_is_any_ram(*t) && mfn_valid(mfn)
> >               && !((q & P2M_UNSHARE) && p2m_is_shared(*t)) )
> >          {
> >              page = mfn_to_page(mfn);
> > -            if ( !get_page(page, d)
> > -                 /* Page could be shared */
> > -                 && !get_page(page, dom_cow) )
> > +            if ( p2m_is_foreign(*t) )
> > +            {
> > +                struct domain *fdom =
> > page_get_owner_and_reference(page);
> > +                ASSERT(fdom && fdom != d);
> 
> Same here regarding the assumption of guaranteed success.

Sure, can change that to return NULL in case of failure.

> > @@ -451,6 +456,10 @@ void p2m_teardown(struct p2m_domain *p2m)
> >      d = p2m->domain;
> >  
> >      p2m_lock(p2m);
> > +    /* pvh: we must release refcnt on all foreign pages */
> > +    if ( is_pvh_domain(d) )
> > +        p2m_change_entry_type_global(d, p2m_map_foreign,
> > p2m_invalid);
> 
> Ah, here it comes. But no, this is a no-go. The function currently is
> running for unbounded time, which I'm about to change (pending
> some more testing; seems to generally work). I can't, however, see

I believe Tim has a patch to make this pre-emptible and OK'd this hook
here. This path for foreign pages is only for destroy of p2m in case 
of domain crash, which at present would only be dom0 (or controlling
domains in future). Normally, these pages are un-mapped by guest via the 
remove from physmap hcall. 

Moreover, the number of foreign pages is relatively small.


> > +int p2m_add_foreign(struct domain *tdom, unsigned long fgfn,
> > +                    unsigned long gpfn, domid_t fdid)
> 
> Please call this foreigndom, just like other code does, allowing for
> it to be found by grep-ing the tree.
> 
> > +{
> > +    p2m_type_t p2mt, p2mt_prev;
> > +    unsigned long prev_mfn, mfn;
> > +    struct page_info *page;
> > +    int rc = -EINVAL;
> > +    struct domain *fdom = NULL;
> > +
> > +    if ( fdid == DOMID_SELF )
> > +        goto out;
> > +
> > +    rc = -ESRCH;
> > +    fdom = get_pg_owner(fdid);
> > +    if ( fdom == NULL )
> > +        goto out;
> > +
> > +    rc = -EINVAL;
> > +    if ( tdom == NULL || tdom == fdom || !is_pvh_domain(tdom) )
> 
> Is tdom == NULL a reasonable thing to expect here. I.e. can't you
> rather ASSERT(tdom)?

Could. Why is it a big deal to check for NULL rather than ASSERT?


> > +        goto out;
> > +
> > +    rc = xsm_map_gmfn_foreign(XSM_TARGET, tdom, fdom);
> > +    if ( rc )
> > +        goto out;
> > +
> > +    /* following will take a refcnt on the mfn */
> > +    page = get_page_from_gfn(fdom, fgfn, &p2mt, P2M_ALLOC);
> > +    if ( !page || !p2m_is_valid(p2mt) )
> 
> Is this really p2m_is_valid() (i.e. including various types apart from
> p2m_ram_rw)?

Hmm.. let me check again in case of HVM guest coming up on PVH dom0, if
there are use cases of non-ram types. Otherwise, it could be just
p2m_is_ram.

thanks
mukesh

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [V9 PATCH 7/8] pvh dom0: check for vioapic null ptr in vioapic_range
  2014-04-16 16:05   ` Jan Beulich
@ 2014-04-17  1:44     ` Mukesh Rathor
  2014-04-17  6:54       ` Jan Beulich
  0 siblings, 1 reply; 58+ messages in thread
From: Mukesh Rathor @ 2014-04-17  1:44 UTC (permalink / raw)
  To: Jan Beulich
  Cc: George.Dunlap, tim, eddie.dong, keir.xen, jun.nakajima, xen-devel

On Wed, 16 Apr 2014 17:05:57 +0100
"Jan Beulich" <JBeulich@suse.com> wrote:

> >>> On 16.04.14 at 02:12, <mukesh.rathor@oracle.com> wrote:
> > pvh doesn't use apic emulation, as a result vioapic_init is not
> > called and vioapic ptr in struct hvm_domain is not initialized. One
> > path that would access the ptr for pvh is :
> > 
> >    hvm_hap_nested_page_fault -> handle_mmio -> hvmemul_do_io ->
> >        hvm_mmio_intercept -> vioapic_range
> 
> Given this I'm not sure the guard belongs here. The majority of the
> handle_mmio() logic should never be used for Dom0. Perhaps you
> should simply have a pvh_mmio_handlers[] paralleling
> hvm_mmio_handlers[], but (presumably) only having HPET and MSI-X
> entries for now?

Well, there's already talk of adding vioapic support for PVH so it
could take advantage of the new features coming up. So, it'll prob 
converge in near future with hvm_mmio_handlers . I'm ok either way. 

thanks,
Mukesh

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [V9 PATCH 6/8] pvh dom0: Add and remove foreign pages
  2014-04-17  1:37     ` Mukesh Rathor
@ 2014-04-17  6:50       ` Jan Beulich
  2014-04-17 12:36         ` Tim Deegan
  2014-04-22  0:19       ` Mukesh Rathor
  1 sibling, 1 reply; 58+ messages in thread
From: Jan Beulich @ 2014-04-17  6:50 UTC (permalink / raw)
  To: Mukesh Rathor
  Cc: George.Dunlap, tim, eddie.dong, keir.xen, jun.nakajima, xen-devel

>>> On 17.04.14 at 03:37, <mukesh.rathor@oracle.com> wrote:
> On Wed, 16 Apr 2014 17:00:35 +0100
> "Jan Beulich" <JBeulich@suse.com> wrote:
> 
>> >>> On 16.04.14 at 02:12, <mukesh.rathor@oracle.com> wrote:
>> > In this patch, a new function, p2m_add_foreign(), is added
>> > to map pages from foreign guest into dom0 for domU creation.
>> > Such pages are typed p2m_map_foreign.  Note, it is the nature
>> > of such pages that a refcnt is held during their stay in the p2m.
>> > The refcnt is added and released in the low level ept function
>> > atomic_write_ept_entry for convenience.
>> > Also, p2m_teardown is changed to add a call to allow for the
>> > cleanup of foreign pages during it's destruction.
>> > Finally, get_pg_owner is changed to allow pvh to map foreign
>> > mappings, and is made public to be used by p2m_add_foreign().
>> 
>> Do you really need to do this at this low a layer? I'm afraid that's
>> going to be rather limiting when wanting to use the bits used for the
>> p2m type for different purposes in intermediate table entries. I.e. I
>> think this special casing should only be done for leaf entries, and
>> hence at least one layer further up, or introduce something named
>> e.g. atomic_write_epte_leaf().
> 
> Well, Tim and I went back and forth several times on this over the
> last several months (you were cc'd :) ). 

I know, but having worked a lot on the P2M code recently my
perspective may have changed.

> Since a refcnt absolutely needs to be taken and released for such 
> pages while they are mapped, the best way to ensure that was to do
> it at the lowest level. This was Tim's suggestion, my earlier patches
> did at higher levels. At present, there is a single purpose for foreign
> types. But future uses, unlikely at this point, can make any relevant
> enhancements.

For starters I suggest you go look at what extra uses of the macro/
function got added between the commit this series was based on
and current tip. And then, with future uses I wasn't referring to
future uses of foreign types, but future uses of fields in the EPT
entries (and namely in intermediate ones, where they are really
unused at present). I say this because one of the failed attempts in
dealing with the preemption/propagation needs was to make use of
these fields already. But yes, it's only a "would be nice", not a strict
requirement to avoid collisions with eventual future uses.

>> > @@ -4583,6 +4583,11 @@ int xenmem_add_to_physmap_one(
>> >              page = mfn_to_page(mfn);
>> >              break;
>> >          }
>> > +        case XENMAPSPACE_gmfn_foreign:
>> > +        {
>> > +            rc = p2m_add_foreign(d, idx, gpfn, foreign_domid);
>> > +            return rc;
>> > +        }
>> 
>> No pointless braces please.
> 
> Often, case statement get sooo long that having braces makes it easier
> to find matching code, specially when indentation is weird for case 
> statements, and uglier still, switch statements within switch statements 
> are allowed. 
> 
> But I'll remove it here anyways rather than argue, even tho the prev
> case has {}.

The fundamental thought here is: "Yes, there are bad examples, but
please let's not add further ones."

>> > +    {
>> > +        int rc;
>> > +        struct page_info *page;
>> > +        struct domain *fdom;
>> > +
>> > +        ASSERT(mfn_valid(new->mfn));
>> > +        page = mfn_to_page(new->mfn);
>> > +        fdom = page_get_owner(page);
>> > +        ASSERT(fdom);
>> > +        rc = get_page(page, fdom);
>> > +        ASSERT(rc);
>> 
>> I'm afraid you can't rely on this to succeed: A PV guest could have
>> mapped the page so many times that you can't get another ref.
> 
> foreign pages are valid for PVH/HVM only, PV would never get here.

Am I wrong in thinking that the mapping domain can only be PVH/HVM,
but the foreign domain can well be PV? Or how do you deal with PVH
Dom0 managing PV guests?

> I can return rc instead of ASSERT. In the prev version review, you
> had asked for an ASSERT here.

Iirc all I said is that you won't get away without looking at the error.
And that I left it open for you to determine whether an assertion
would be valid, saving you from handling the error (which involves
more than just returning an rc: the function isn't currently expected
to have a way to fail - one more reason to reconsider moving this up
a layer).

>> > @@ -451,6 +456,10 @@ void p2m_teardown(struct p2m_domain *p2m)
>> >      d = p2m->domain;
>> >  
>> >      p2m_lock(p2m);
>> > +    /* pvh: we must release refcnt on all foreign pages */
>> > +    if ( is_pvh_domain(d) )
>> > +        p2m_change_entry_type_global(d, p2m_map_foreign,
>> > p2m_invalid);
>> 
>> Ah, here it comes. But no, this is a no-go. The function currently is
>> running for unbounded time, which I'm about to change (pending
>> some more testing; seems to generally work). I can't, however, see
> 
> I believe Tim has a patch to make this pre-emptible and OK'd this hook
> here.

Has he? I'm sure he would have told me, so I could have avoided
working towards the (almost) same during the last several weeks.

> This path for foreign pages is only for destroy of p2m in case 
> of domain crash, which at present would only be dom0 (or controlling
> domains in future). Normally, these pages are un-mapped by guest via the 
> remove from physmap hcall. 
> 
> Moreover, the number of foreign pages is relatively small.

That's missing the point: No matter how small their count, you still
need to iterate through the entire page table hierarchy. Plus - what
makes you think that count is small? Qemu, iirc, keeps quite a few
pages mapped - are they not mapped using this mechanism? Plus
their count multiplies by the number of domains managed (arguably
that count should be greater than 1 only for non-Dom0 control
domains, and Dom0 won't make it here anyway - which raises the
question whether this change is of any practical use under the
topic on the patch series, i.e. enabling PVH Dom0).

>> > +{
>> > +    p2m_type_t p2mt, p2mt_prev;
>> > +    unsigned long prev_mfn, mfn;
>> > +    struct page_info *page;
>> > +    int rc = -EINVAL;
>> > +    struct domain *fdom = NULL;
>> > +
>> > +    if ( fdid == DOMID_SELF )
>> > +        goto out;
>> > +
>> > +    rc = -ESRCH;
>> > +    fdom = get_pg_owner(fdid);
>> > +    if ( fdom == NULL )
>> > +        goto out;
>> > +
>> > +    rc = -EINVAL;
>> > +    if ( tdom == NULL || tdom == fdom || !is_pvh_domain(tdom) )
>> 
>> Is tdom == NULL a reasonable thing to expect here. I.e. can't you
>> rather ASSERT(tdom)?
> 
> Could. Why is it a big deal to check for NULL rather than ASSERT?

It serves documentation purposes: If you return an error upon seeing
NULL, you expect someone could call you that way. If you assert, you
make clear that callers have to make sure they pass a valid pointer.

Jan

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [V9 PATCH 7/8] pvh dom0: check for vioapic null ptr in vioapic_range
  2014-04-17  1:44     ` Mukesh Rathor
@ 2014-04-17  6:54       ` Jan Beulich
  2014-04-22  0:59         ` Mukesh Rathor
  0 siblings, 1 reply; 58+ messages in thread
From: Jan Beulich @ 2014-04-17  6:54 UTC (permalink / raw)
  To: Mukesh Rathor
  Cc: George.Dunlap, tim, eddie.dong, keir.xen, jun.nakajima, xen-devel

>>> On 17.04.14 at 03:44, <mukesh.rathor@oracle.com> wrote:
> On Wed, 16 Apr 2014 17:05:57 +0100
> "Jan Beulich" <JBeulich@suse.com> wrote:
> 
>> >>> On 16.04.14 at 02:12, <mukesh.rathor@oracle.com> wrote:
>> > pvh doesn't use apic emulation, as a result vioapic_init is not
>> > called and vioapic ptr in struct hvm_domain is not initialized. One
>> > path that would access the ptr for pvh is :
>> > 
>> >    hvm_hap_nested_page_fault -> handle_mmio -> hvmemul_do_io ->
>> >        hvm_mmio_intercept -> vioapic_range
>> 
>> Given this I'm not sure the guard belongs here. The majority of the
>> handle_mmio() logic should never be used for Dom0. Perhaps you
>> should simply have a pvh_mmio_handlers[] paralleling
>> hvm_mmio_handlers[], but (presumably) only having HPET and MSI-X
>> entries for now?
> 
> Well, there's already talk of adding vioapic support for PVH so it
> could take advantage of the new features coming up. So, it'll prob 
> converge in near future with hvm_mmio_handlers . I'm ok either way. 

>From a conceptual pov I still think the separation of emulation paths
should happen earlier and/or be more explicit, not the least because
iirc PVH guests are expected to not have a qemu associated.

That aside - why is this coming up only now? The emulation path
getting reached shouldn't really depend on Dom0 vs Domu?

Jan

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [V9 PATCH 6/8] pvh dom0: Add and remove foreign pages
  2014-04-17  6:50       ` Jan Beulich
@ 2014-04-17 12:36         ` Tim Deegan
  2014-04-17 13:58           ` Jan Beulich
  0 siblings, 1 reply; 58+ messages in thread
From: Tim Deegan @ 2014-04-17 12:36 UTC (permalink / raw)
  To: Jan Beulich; +Cc: George.Dunlap, eddie.dong, keir.xen, jun.nakajima, xen-devel

At 07:50 +0100 on 17 Apr (1397717440), Jan Beulich wrote:
> >>> On 17.04.14 at 03:37, <mukesh.rathor@oracle.com> wrote:
> > On Wed, 16 Apr 2014 17:00:35 +0100
> > "Jan Beulich" <JBeulich@suse.com> wrote:
> > 
> >> >>> On 16.04.14 at 02:12, <mukesh.rathor@oracle.com> wrote:
> >> > In this patch, a new function, p2m_add_foreign(), is added
> >> > to map pages from foreign guest into dom0 for domU creation.
> >> > Such pages are typed p2m_map_foreign.  Note, it is the nature
> >> > of such pages that a refcnt is held during their stay in the p2m.
> >> > The refcnt is added and released in the low level ept function
> >> > atomic_write_ept_entry for convenience.
> >> > Also, p2m_teardown is changed to add a call to allow for the
> >> > cleanup of foreign pages during it's destruction.
> >> > Finally, get_pg_owner is changed to allow pvh to map foreign
> >> > mappings, and is made public to be used by p2m_add_foreign().
> >> 
> >> Do you really need to do this at this low a layer? I'm afraid that's
> >> going to be rather limiting when wanting to use the bits used for the
> >> p2m type for different purposes in intermediate table entries. I.e. I
> >> think this special casing should only be done for leaf entries, and
> >> hence at least one layer further up, or introduce something named
> >> e.g. atomic_write_epte_leaf().
> > 
> > Well, Tim and I went back and forth several times on this over the
> > last several months (you were cc'd :) ). 
> 
> I know, but having worked a lot on the P2M code recently my
> perspective may have changed.

[I'm assuming the objection here is to having ther refcounts updated
in atomic_write_ept_entry, which was the change I requested.]
My opinion is still very strongly that reference counting must be done
when the entries change.  Trying to get this kind of thing right in
the callers is an _enormous_ PITA, as I learned working on the shadow
pagetables.  It would get very messy (see, e.g. the myriad places
where p2m op callers individually check for paged/shared entries) and
it'd be nigh impossible to debug where in several hours of operation
something changed a p2m entry from foreign to something else without
dropping a page ref.

That said, it should be easy enough only to refcount on leaf entries,
right?  I can't see how that would be incompatible with the
intermediate-node changes that Jan is working on.

> >> > @@ -451,6 +456,10 @@ void p2m_teardown(struct p2m_domain *p2m)
> >> >      d = p2m->domain;
> >> >  
> >> >      p2m_lock(p2m);
> >> > +    /* pvh: we must release refcnt on all foreign pages */
> >> > +    if ( is_pvh_domain(d) )
> >> > +        p2m_change_entry_type_global(d, p2m_map_foreign,
> >> > p2m_invalid);
> >> 
> >> Ah, here it comes. But no, this is a no-go. The function currently is
> >> running for unbounded time, which I'm about to change (pending
> >> some more testing; seems to generally work). I can't, however, see
> > 
> > I believe Tim has a patch to make this pre-emptible and OK'd this hook
> > here.
> 
> Has he? I'm sure he would have told me, so I could have avoided
> working towards the (almost) same during the last several weeks.

I don't have a patch -- only ever had good intentions of working
on it in my Copious Free Time.  But since we're going to have to
make a preemptible teardown anyway, I was happy to let this pass on
the grounds that it would get folded into the preemptible version in
time. 

> Plus
> their count multiplies by the number of domains managed (arguably
> that count should be greater than 1 only for non-Dom0 control
> domains, and Dom0 won't make it here anyway - which raises the
> question whether this change is of any practical use under the
> topic on the patch series, i.e. enabling PVH Dom0).

I think this is my fault again -- under the same rules of refcounting
hygiene that demand the ref get/put go right beside the datastructure
update, I asked Mukesh to make sure that the teardown operation
correctly dismantled its refcounts.  

Tim.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [V9 PATCH 6/8] pvh dom0: Add and remove foreign pages
  2014-04-17 12:36         ` Tim Deegan
@ 2014-04-17 13:58           ` Jan Beulich
  2014-04-19  0:59             ` Mukesh Rathor
  2014-04-24  2:21             ` Mukesh Rathor
  0 siblings, 2 replies; 58+ messages in thread
From: Jan Beulich @ 2014-04-17 13:58 UTC (permalink / raw)
  To: Tim Deegan; +Cc: George.Dunlap, eddie.dong, keir.xen, jun.nakajima, xen-devel

>>> On 17.04.14 at 14:36, <tim@xen.org> wrote:
> At 07:50 +0100 on 17 Apr (1397717440), Jan Beulich wrote:
>> >>> On 17.04.14 at 03:37, <mukesh.rathor@oracle.com> wrote:
>> > On Wed, 16 Apr 2014 17:00:35 +0100
>> > "Jan Beulich" <JBeulich@suse.com> wrote:
>> > 
>> >> >>> On 16.04.14 at 02:12, <mukesh.rathor@oracle.com> wrote:
>> >> > In this patch, a new function, p2m_add_foreign(), is added
>> >> > to map pages from foreign guest into dom0 for domU creation.
>> >> > Such pages are typed p2m_map_foreign.  Note, it is the nature
>> >> > of such pages that a refcnt is held during their stay in the p2m.
>> >> > The refcnt is added and released in the low level ept function
>> >> > atomic_write_ept_entry for convenience.
>> >> > Also, p2m_teardown is changed to add a call to allow for the
>> >> > cleanup of foreign pages during it's destruction.
>> >> > Finally, get_pg_owner is changed to allow pvh to map foreign
>> >> > mappings, and is made public to be used by p2m_add_foreign().
>> >> 
>> >> Do you really need to do this at this low a layer? I'm afraid that's
>> >> going to be rather limiting when wanting to use the bits used for the
>> >> p2m type for different purposes in intermediate table entries. I.e. I
>> >> think this special casing should only be done for leaf entries, and
>> >> hence at least one layer further up, or introduce something named
>> >> e.g. atomic_write_epte_leaf().
>> > 
>> > Well, Tim and I went back and forth several times on this over the
>> > last several months (you were cc'd :) ). 
>> 
>> I know, but having worked a lot on the P2M code recently my
>> perspective may have changed.
> 
> [I'm assuming the objection here is to having ther refcounts updated
> in atomic_write_ept_entry, which was the change I requested.]
> My opinion is still very strongly that reference counting must be done
> when the entries change.  Trying to get this kind of thing right in
> the callers is an _enormous_ PITA, as I learned working on the shadow
> pagetables.  It would get very messy (see, e.g. the myriad places
> where p2m op callers individually check for paged/shared entries) and
> it'd be nigh impossible to debug where in several hours of operation
> something changed a p2m entry from foreign to something else without
> dropping a page ref.
> 
> That said, it should be easy enough only to refcount on leaf entries,
> right?  I can't see how that would be incompatible with the
> intermediate-node changes that Jan is working on.

Right - keeping the macro as is and introducing a derived function to
handle the extra requirements on leaf entries would seem quite okay,
so long as error propagation can be done properly.

>> Plus
>> their count multiplies by the number of domains managed (arguably
>> that count should be greater than 1 only for non-Dom0 control
>> domains, and Dom0 won't make it here anyway - which raises the
>> question whether this change is of any practical use under the
>> topic on the patch series, i.e. enabling PVH Dom0).
> 
> I think this is my fault again -- under the same rules of refcounting
> hygiene that demand the ref get/put go right beside the datastructure
> update, I asked Mukesh to make sure that the teardown operation
> correctly dismantled its refcounts.  

Which is still fine as a concept, just how it is being carried out is a
no-go imo. Yet since you can't get at each individual page table leaf
without traversing the page tables, and since traversing the page
tables is expensive, finding a different solution would still seem
preferable. On option besides the one mentioned before might be to
simply change the whole guest address space to PoD (ideally at 1G
granularity, if only PoD supported those) - that ought to cause the
references to be dropped via the mechanism above.

Jan

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [V9 PATCH 6/8] pvh dom0: Add and remove foreign pages
  2014-04-17 13:58           ` Jan Beulich
@ 2014-04-19  0:59             ` Mukesh Rathor
  2014-04-21 16:10               ` Jan Beulich
  2014-04-24  2:21             ` Mukesh Rathor
  1 sibling, 1 reply; 58+ messages in thread
From: Mukesh Rathor @ 2014-04-19  0:59 UTC (permalink / raw)
  To: Jan Beulich
  Cc: George.Dunlap, Tim Deegan, eddie.dong, keir.xen, jun.nakajima, xen-devel

On Thu, 17 Apr 2014 14:58:42 +0100
"Jan Beulich" <JBeulich@suse.com> wrote:

> >>> On 17.04.14 at 14:36, <tim@xen.org> wrote:
> > At 07:50 +0100 on 17 Apr (1397717440), Jan Beulich wrote:
> >> >>> On 17.04.14 at 03:37, <mukesh.rathor@oracle.com> wrote:
> >> > On Wed, 16 Apr 2014 17:00:35 +0100
> >> > "Jan Beulich" <JBeulich@suse.com> wrote:
> >> > 
.....
> > [I'm assuming the objection here is to having ther refcounts updated
> > in atomic_write_ept_entry, which was the change I requested.]
> > My opinion is still very strongly that reference counting must be
> > done when the entries change.  Trying to get this kind of thing
> > right in the callers is an _enormous_ PITA, as I learned working on
> > the shadow pagetables.  It would get very messy (see, e.g. the
> > myriad places where p2m op callers individually check for
> > paged/shared entries) and it'd be nigh impossible to debug where in
> > several hours of operation something changed a p2m entry from
> > foreign to something else without dropping a page ref.
> > 
> > That said, it should be easy enough only to refcount on leaf
> > entries, right?  I can't see how that would be incompatible with the
> > intermediate-node changes that Jan is working on.
> 
> Right - keeping the macro as is and introducing a derived function to
> handle the extra requirements on leaf entries would seem quite okay,
> so long as error propagation can be done properly.

Ok, thats doable.

> >> Plus
> >> their count multiplies by the number of domains managed (arguably
> >> that count should be greater than 1 only for non-Dom0 control
> >> domains, and Dom0 won't make it here anyway - which raises the
> >> question whether this change is of any practical use under the
> >> topic on the patch series, i.e. enabling PVH Dom0).

Which is why it was not part of series, but only added after nak from
Tim. Anyways, I understand and respect where h was coming from. My 
understanding was that the teardown would be pre-emptible soon, and if
not, it's not as critical overhead since it is only relevant when 
dom0/control-domain crashes while foreign pages are mapped by it.

I am happy to drop this change as part of this dom0 series, and it can
be done when PVH controlling domain gets added. Oracle product doesn't
use control domains, so I'm not a whole lot familiar with them at present. 

To your other comment: 
>Why can't this be done as pages get de-allocated (which is a
>preemptable process already)?

We are looking for foreign entries in the current privileged domain's
p2m and wanting to release refcnt on them. I don't understand which
pages being de-allocated we would look at? You mean p2m leaf pages?

thanks,
Mukesh

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [V9 PATCH 6/8] pvh dom0: Add and remove foreign pages
  2014-04-19  0:59             ` Mukesh Rathor
@ 2014-04-21 16:10               ` Jan Beulich
  0 siblings, 0 replies; 58+ messages in thread
From: Jan Beulich @ 2014-04-21 16:10 UTC (permalink / raw)
  To: mukesh.rathor
  Cc: George.Dunlap, tim, eddie.dong, keir.xen, jun.nakajima, xen-devel

>>> Mukesh Rathor <mukesh.rathor@oracle.com> 04/19/14 3:00 AM >>>
>On Thu, 17 Apr 2014 14:58:42 +0100
>"Jan Beulich" <JBeulich@suse.com> wrote:
>>Why can't this be done as pages get de-allocated (which is a
>>preemptable process already)?
>
>We are looking for foreign entries in the current privileged domain's
>p2m and wanting to release refcnt on them. I don't understand which
>pages being de-allocated we would look at? You mean p2m leaf pages?

Whenever a last-level P2M page gets released, it could be inspected for holding
foreign mappings, and the respective page refs could be released at that point.
But I say this without having looked at how cumbersome that might end up being...

Jan

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [V9 PATCH 6/8] pvh dom0: Add and remove foreign pages
  2014-04-17  1:37     ` Mukesh Rathor
  2014-04-17  6:50       ` Jan Beulich
@ 2014-04-22  0:19       ` Mukesh Rathor
  2014-04-22  7:28         ` Jan Beulich
  1 sibling, 1 reply; 58+ messages in thread
From: Mukesh Rathor @ 2014-04-22  0:19 UTC (permalink / raw)
  To: Jan Beulich
  Cc: George.Dunlap, tim, eddie.dong, keir.xen, jun.nakajima, xen-devel

On Wed, 16 Apr 2014 18:37:42 -0700
Mukesh Rathor <mukesh.rathor@oracle.com> wrote:

> On Wed, 16 Apr 2014 17:00:35 +0100
> "Jan Beulich" <JBeulich@suse.com> wrote:
> 
............

> 
> > > +        goto out;
> > > +
> > > +    rc = xsm_map_gmfn_foreign(XSM_TARGET, tdom, fdom);
> > > +    if ( rc )
> > > +        goto out;
> > > +
> > > +    /* following will take a refcnt on the mfn */
> > > +    page = get_page_from_gfn(fdom, fgfn, &p2mt, P2M_ALLOC);
> > > +    if ( !page || !p2m_is_valid(p2mt) )
> > 
> > Is this really p2m_is_valid() (i.e. including various types apart
> > from p2m_ram_rw)?
> 
> Hmm.. let me check again in case of HVM guest coming up on PVH dom0,
> if there are use cases of non-ram types. Otherwise, it could be just
> p2m_is_ram.

Ok, following is the change I'm going to make in the next version
for above comment:

diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index 67aa5f6..8d3eb95 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -1799,7 +1799,7 @@ int p2m_add_foreign(struct domain *tdom, unsigned long fgf
 
     /* following will take a refcnt on the mfn */
     page = get_page_from_gfn(fdom, fgfn, &p2mt, P2M_ALLOC);
-    if ( !page || !p2m_is_valid(p2mt) )
+    if ( !page || !p2m_is_strict_ram(p2mt) )
     {
         if ( page )
             put_page(page);
diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h
index 47604da..e0bf6dc 100644
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -165,6 +165,7 @@ typedef unsigned int p2m_query_t;
 
 /* Useful predicates */
+#define p2m_is_strict_ram(_t) (p2m_to_mask(_t) & p2m_to_mask(p2m_ram_rw))
 #define p2m_is_ram(_t) (p2m_to_mask(_t) & P2M_RAM_TYPES)
 #define p2m_is_hole(_t) (p2m_to_mask(_t) & P2M_HOLE_TYPES)
 #define p2m_is_mmio(_t) (p2m_to_mask(_t) & P2M_MMIO_TYPES)


Lmk if you've issues with introducing p2m_is_strict_ram. In that case,
we can just use p2m_is_ram.

thanks
Mukesh

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* Re: [V9 PATCH 7/8] pvh dom0: check for vioapic null ptr in vioapic_range
  2014-04-17  6:54       ` Jan Beulich
@ 2014-04-22  0:59         ` Mukesh Rathor
  2014-04-22  7:33           ` Jan Beulich
  0 siblings, 1 reply; 58+ messages in thread
From: Mukesh Rathor @ 2014-04-22  0:59 UTC (permalink / raw)
  To: Jan Beulich
  Cc: George.Dunlap, tim, eddie.dong, keir.xen, jun.nakajima, xen-devel

On Thu, 17 Apr 2014 07:54:55 +0100
"Jan Beulich" <JBeulich@suse.com> wrote:

> >>> On 17.04.14 at 03:44, <mukesh.rathor@oracle.com> wrote:
> > On Wed, 16 Apr 2014 17:05:57 +0100
> > "Jan Beulich" <JBeulich@suse.com> wrote:
> > 
> >> >>> On 16.04.14 at 02:12, <mukesh.rathor@oracle.com> wrote:
> >> > pvh doesn't use apic emulation, as a result vioapic_init is not
> >> > called and vioapic ptr in struct hvm_domain is not initialized.
> >> > One path that would access the ptr for pvh is :
> >> > 
> >> >    hvm_hap_nested_page_fault -> handle_mmio -> hvmemul_do_io ->
> >> >        hvm_mmio_intercept -> vioapic_range
> >> 
> >> Given this I'm not sure the guard belongs here. The majority of the
> >> handle_mmio() logic should never be used for Dom0. Perhaps you
> >> should simply have a pvh_mmio_handlers[] paralleling
> >> hvm_mmio_handlers[], but (presumably) only having HPET and MSI-X
> >> entries for now?
> > 
> > Well, there's already talk of adding vioapic support for PVH so it
> > could take advantage of the new features coming up. So, it'll prob 
> > converge in near future with hvm_mmio_handlers . I'm ok either way. 
> 
> From a conceptual pov I still think the separation of emulation paths
> should happen earlier and/or be more explicit, not the least because
> iirc PVH guests are expected to not have a qemu associated.

Correct. I've following for next version:

diff --git a/xen/arch/x86/hvm/intercept.c b/xen/arch/x86/hvm/intercept.c
index 7cc13b5..f89be28 100644
--- a/xen/arch/x86/hvm/intercept.c
+++ b/xen/arch/x86/hvm/intercept.c
@@ -42,6 +42,16 @@ hvm_mmio_handlers[HVM_MMIO_HANDLER_NR] =
     &iommu_mmio_handler
 };

+static const struct hvm_mmio_handler *const
+pvh_mmio_handlers[HVM_MMIO_HANDLER_NR] =
+{
+    &hpet_mmio_handler,
+    NULL,
+    NULL,
+    &msixtbl_mmio_handler,
+    NULL,
+};
+
 static int hvm_mmio_access(struct vcpu *v,
                            ioreq_t *p,
                            hvm_mmio_read_t read_handler,
@@ -169,11 +179,13 @@ int hvm_mmio_intercept(ioreq_t *p)
     int i;

     for ( i = 0; i < HVM_MMIO_HANDLER_NR; i++ )
-        if ( hvm_mmio_handlers[i]->check_handler(v, p->addr) )
-            return hvm_mmio_access(
-                v, p,
-                hvm_mmio_handlers[i]->read_handler,
-                hvm_mmio_handlers[i]->write_handler);
+    {
+        const struct hvm_mmio_handler *mmio_handler = hvm_mmio_handlers[i];
+
+        if ( mmio_handler && mmio_handler->check_handler(v, p->addr) )
+            return hvm_mmio_access(v, p, mmio_handler->read_handler,
+                                   mmio_handler->write_handler);
+    }

     return X86EMUL_UNHANDLEABLE;
 }



> That aside - why is this coming up only now? The emulation path
> getting reached shouldn't really depend on Dom0 vs Domu?

The io emulation is handled by handle_pvh_io; there shouldn't be 
path for pvh domu leading to this function with all the  
restrictions and limitations it has at present.

thanks
mukesh

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* Re: [V9 PATCH 6/8] pvh dom0: Add and remove foreign pages
  2014-04-22  0:19       ` Mukesh Rathor
@ 2014-04-22  7:28         ` Jan Beulich
  2014-04-23  0:28           ` Mukesh Rathor
  0 siblings, 1 reply; 58+ messages in thread
From: Jan Beulich @ 2014-04-22  7:28 UTC (permalink / raw)
  To: Mukesh Rathor
  Cc: George.Dunlap, tim, eddie.dong, keir.xen, jun.nakajima, xen-devel

>>> On 22.04.14 at 02:19, <mukesh.rathor@oracle.com> wrote:
> On Wed, 16 Apr 2014 18:37:42 -0700
> Mukesh Rathor <mukesh.rathor@oracle.com> wrote:
> 
>> On Wed, 16 Apr 2014 17:00:35 +0100
>> "Jan Beulich" <JBeulich@suse.com> wrote:
>> 
> ............
> 
>> 
>> > > +        goto out;
>> > > +
>> > > +    rc = xsm_map_gmfn_foreign(XSM_TARGET, tdom, fdom);
>> > > +    if ( rc )
>> > > +        goto out;
>> > > +
>> > > +    /* following will take a refcnt on the mfn */
>> > > +    page = get_page_from_gfn(fdom, fgfn, &p2mt, P2M_ALLOC);
>> > > +    if ( !page || !p2m_is_valid(p2mt) )
>> > 
>> > Is this really p2m_is_valid() (i.e. including various types apart
>> > from p2m_ram_rw)?
>> 
>> Hmm.. let me check again in case of HVM guest coming up on PVH dom0,
>> if there are use cases of non-ram types. Otherwise, it could be just
>> p2m_is_ram.
> 
> Ok, following is the change I'm going to make in the next version
> for above comment:
> 
> diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
> index 67aa5f6..8d3eb95 100644
> --- a/xen/arch/x86/mm/p2m.c
> +++ b/xen/arch/x86/mm/p2m.c
> @@ -1799,7 +1799,7 @@ int p2m_add_foreign(struct domain *tdom, unsigned long fgf
>  
>      /* following will take a refcnt on the mfn */
>      page = get_page_from_gfn(fdom, fgfn, &p2mt, P2M_ALLOC);
> -    if ( !page || !p2m_is_valid(p2mt) )
> +    if ( !page || !p2m_is_strict_ram(p2mt) )
>      {
>          if ( page )
>              put_page(page);
> diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h
> index 47604da..e0bf6dc 100644
> --- a/xen/include/asm-x86/p2m.h
> +++ b/xen/include/asm-x86/p2m.h
> @@ -165,6 +165,7 @@ typedef unsigned int p2m_query_t;
>  
>  /* Useful predicates */
> +#define p2m_is_strict_ram(_t) (p2m_to_mask(_t) & p2m_to_mask(p2m_ram_rw))

Not sure - this now precludes mapping log-dirty or r/o pages, i.e. I
think you went from too relaxed to too strict. I think the question
needs answering in a more abstract fashion first, i.e. without code
written immediately: Which pages absolutely need to be foreign-
mappable and which ones can safely get foreign-mapped. My first
thought would be P2M_RAM_TYPES & ~P2M_HOLE_TYPES, with
P2M_HOLE_TYPES contained in P2M_RAM_TYPES generating a kind
of -EAGAIN notification while triggering the re-creation of the page
(which already is an indication that instead of P2M_HOLE_TYPES it
might well need to be P2M_PAGING_TYPES).

Jan

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [V9 PATCH 7/8] pvh dom0: check for vioapic null ptr in vioapic_range
  2014-04-22  0:59         ` Mukesh Rathor
@ 2014-04-22  7:33           ` Jan Beulich
  2014-04-23  0:11             ` Mukesh Rathor
  0 siblings, 1 reply; 58+ messages in thread
From: Jan Beulich @ 2014-04-22  7:33 UTC (permalink / raw)
  To: Mukesh Rathor
  Cc: George.Dunlap, tim, eddie.dong, keir.xen, jun.nakajima, xen-devel

>>> On 22.04.14 at 02:59, <mukesh.rathor@oracle.com> wrote:
> On Thu, 17 Apr 2014 07:54:55 +0100
> "Jan Beulich" <JBeulich@suse.com> wrote:
>> From a conceptual pov I still think the separation of emulation paths
>> should happen earlier and/or be more explicit, not the least because
>> iirc PVH guests are expected to not have a qemu associated.
> 
> Correct. I've following for next version:
> 
> diff --git a/xen/arch/x86/hvm/intercept.c b/xen/arch/x86/hvm/intercept.c
> index 7cc13b5..f89be28 100644
> --- a/xen/arch/x86/hvm/intercept.c
> +++ b/xen/arch/x86/hvm/intercept.c
> @@ -42,6 +42,16 @@ hvm_mmio_handlers[HVM_MMIO_HANDLER_NR] =
>      &iommu_mmio_handler
>  };
> 
> +static const struct hvm_mmio_handler *const
> +pvh_mmio_handlers[HVM_MMIO_HANDLER_NR] =
> +{
> +    &hpet_mmio_handler,
> +    NULL,
> +    NULL,
> +    &msixtbl_mmio_handler,
> +    NULL,
> +};
> +
>  static int hvm_mmio_access(struct vcpu *v,
>                             ioreq_t *p,
>                             hvm_mmio_read_t read_handler,
> @@ -169,11 +179,13 @@ int hvm_mmio_intercept(ioreq_t *p)
>      int i;
> 
>      for ( i = 0; i < HVM_MMIO_HANDLER_NR; i++ )
> -        if ( hvm_mmio_handlers[i]->check_handler(v, p->addr) )
> -            return hvm_mmio_access(
> -                v, p,
> -                hvm_mmio_handlers[i]->read_handler,
> -                hvm_mmio_handlers[i]->write_handler);
> +    {
> +        const struct hvm_mmio_handler *mmio_handler = hvm_mmio_handlers[i];
> +
> +        if ( mmio_handler && mmio_handler->check_handler(v, p->addr) )
> +            return hvm_mmio_access(v, p, mmio_handler->read_handler,
> +                                   mmio_handler->write_handler);
> +    }
> 
>      return X86EMUL_UNHANDLEABLE;
>  }

I think I'm getting the idea, but the code neither refernces
pvh_mmio_handlers[], nor is that array's initialization well done
(should be using .<field> = <value> style instead, omitting the
NULLs).

>> That aside - why is this coming up only now? The emulation path
>> getting reached shouldn't really depend on Dom0 vs Domu?
> 
> The io emulation is handled by handle_pvh_io; there shouldn't be 
> path for pvh domu leading to this function with all the  
> restrictions and limitations it has at present.

In which case we're back to the initial question: Why is this patch
needed in the first place? If there is a separate emulation path already,
how do we manage to get to the point where you added the extra
check?

Jan

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [V9 PATCH 7/8] pvh dom0: check for vioapic null ptr in vioapic_range
  2014-04-22  7:33           ` Jan Beulich
@ 2014-04-23  0:11             ` Mukesh Rathor
  2014-04-23  9:07               ` Jan Beulich
  0 siblings, 1 reply; 58+ messages in thread
From: Mukesh Rathor @ 2014-04-23  0:11 UTC (permalink / raw)
  To: Jan Beulich
  Cc: George.Dunlap, tim, eddie.dong, keir.xen, jun.nakajima, xen-devel

On Tue, 22 Apr 2014 08:33:29 +0100
"Jan Beulich" <JBeulich@suse.com> wrote:

> >>> On 22.04.14 at 02:59, <mukesh.rathor@oracle.com> wrote:
> > On Thu, 17 Apr 2014 07:54:55 +0100
> > "Jan Beulich" <JBeulich@suse.com> wrote:
.......
> I think I'm getting the idea, but the code neither refernces
> pvh_mmio_handlers[], nor is that array's initialization well done
> (should be using .<field> = <value> style instead, omitting the
> NULLs).

oops, forgot to add the if pvh check. 

> >> That aside - why is this coming up only now? The emulation path
> >> getting reached shouldn't really depend on Dom0 vs Domu?
> > 
> > The io emulation is handled by handle_pvh_io; there shouldn't be 
> > path for pvh domu leading to this function with all the  
> > restrictions and limitations it has at present.
> 
> In which case we're back to the initial question: Why is this patch
> needed in the first place? If there is a separate emulation path
> already, how do we manage to get to the point where you added the
> extra check?

As described in the patch description:

-----
pvh doesn't use apic emulation, as a result vioapic_init is not called
and vioapic ptr in struct hvm_domain is not initialized. One path that
would access the ptr for pvh is :

   hvm_hap_nested_page_fault -> handle_mmio -> hvmemul_do_io ->
          hvm_mmio_intercept -> vioapic_range
-----

The only caller of hvm_hap_nested_page_fault is ept_handle_violation.

Now, 3 calls to handle_mmio in hvm_hap_nested_page_fault:
  1st is for nested vcpu, so doesn't apply to PVH.
  2nd has is_hvm check, 

  So it must have been the third one that I had observed the
  vioapic_range crash in a while ago, and had made note of it. Looking
  at it:

    if ( (p2mt == p2m_mmio_dm) ||
         (access_w && (p2mt == p2m_ram_ro)) )
    {
        put_gfn(p2m->domain, gfn);
        if ( !handle_mmio() )

doesn't seem apply to domu. Unfortunately, I can't reproduce it now
so maybe it was an ept violation due to some bug, and a crash in 
vioapic_range before printing the gfn/mfns etc by ept_handle_violation
made me make a note to put a check in it.

Hope that makes sense, and I'll assume you are ok with
pvh_mmio_handlers[] change. Otherwise, please lmk.

thanks
mukesh

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [V9 PATCH 6/8] pvh dom0: Add and remove foreign pages
  2014-04-22  7:28         ` Jan Beulich
@ 2014-04-23  0:28           ` Mukesh Rathor
  2014-04-23  9:03             ` Jan Beulich
  0 siblings, 1 reply; 58+ messages in thread
From: Mukesh Rathor @ 2014-04-23  0:28 UTC (permalink / raw)
  To: Jan Beulich
  Cc: George.Dunlap, tim, eddie.dong, keir.xen, jun.nakajima, xen-devel

On Tue, 22 Apr 2014 08:28:45 +0100
"Jan Beulich" <JBeulich@suse.com> wrote:

> >>> On 22.04.14 at 02:19, <mukesh.rathor@oracle.com> wrote:
> > On Wed, 16 Apr 2014 18:37:42 -0700
> > Mukesh Rathor <mukesh.rathor@oracle.com> wrote:
> > 
> >> On Wed, 16 Apr 2014 17:00:35 +0100
> >> "Jan Beulich" <JBeulich@suse.com> wrote:
> >> 
............

> >      /* following will take a refcnt on the mfn */
> >      page = get_page_from_gfn(fdom, fgfn, &p2mt, P2M_ALLOC);
> > -    if ( !page || !p2m_is_valid(p2mt) )
> > +    if ( !page || !p2m_is_strict_ram(p2mt) )
> >      {
> >          if ( page )
> >              put_page(page);
> > diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h
> > index 47604da..e0bf6dc 100644
> > --- a/xen/include/asm-x86/p2m.h
> > +++ b/xen/include/asm-x86/p2m.h
> > @@ -165,6 +165,7 @@ typedef unsigned int p2m_query_t;
> >  
> >  /* Useful predicates */
> > +#define p2m_is_strict_ram(_t) (p2m_to_mask(_t) &
> > p2m_to_mask(p2m_ram_rw))
> 
> Not sure - this now precludes mapping log-dirty or r/o pages, i.e. I
> think you went from too relaxed to too strict. I think the question
> needs answering in a more abstract fashion first, i.e. without code
> written immediately: Which pages absolutely need to be foreign-
> mappable and which ones can safely get foreign-mapped. My first
> thought would be P2M_RAM_TYPES & ~P2M_HOLE_TYPES, with
> P2M_HOLE_TYPES contained in P2M_RAM_TYPES generating a kind
> of -EAGAIN notification while triggering the re-creation of the page
> (which already is an indication that instead of P2M_HOLE_TYPES it
> might well need to be P2M_PAGING_TYPES).

The current use case is p2m_ram_rw, my approach would be start with 
minimum and add more as needed when features like migration, passthru, etc
are added. My understanding of the various p2m types is limited, so I
am at your mercy here to advise what types to anticipate. IMO, just
checking for P2M_RAM_TYPES might suffice, I can additionally add check
to make sure it's not P2M_HOLE_TYPES. Please lmk.

thanks
mukesh

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [V9 PATCH 6/8] pvh dom0: Add and remove foreign pages
  2014-04-23  0:28           ` Mukesh Rathor
@ 2014-04-23  9:03             ` Jan Beulich
  2014-04-23 16:13               ` Andres Lagar-Cavilla
  0 siblings, 1 reply; 58+ messages in thread
From: Jan Beulich @ 2014-04-23  9:03 UTC (permalink / raw)
  To: Mukesh Rathor
  Cc: George.Dunlap, tim, eddie.dong, keir.xen, Andres Lagar-Cavilla,
	jun.nakajima, xen-devel

>>> On 23.04.14 at 02:28, <mukesh.rathor@oracle.com> wrote:
> On Tue, 22 Apr 2014 08:28:45 +0100
> "Jan Beulich" <JBeulich@suse.com> wrote:
> 
>> >>> On 22.04.14 at 02:19, <mukesh.rathor@oracle.com> wrote:
>> > On Wed, 16 Apr 2014 18:37:42 -0700
>> > Mukesh Rathor <mukesh.rathor@oracle.com> wrote:
>> > 
>> >> On Wed, 16 Apr 2014 17:00:35 +0100
>> >> "Jan Beulich" <JBeulich@suse.com> wrote:
>> >> 
> ............
> 
>> >      /* following will take a refcnt on the mfn */
>> >      page = get_page_from_gfn(fdom, fgfn, &p2mt, P2M_ALLOC);
>> > -    if ( !page || !p2m_is_valid(p2mt) )
>> > +    if ( !page || !p2m_is_strict_ram(p2mt) )
>> >      {
>> >          if ( page )
>> >              put_page(page);
>> > diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h
>> > index 47604da..e0bf6dc 100644
>> > --- a/xen/include/asm-x86/p2m.h
>> > +++ b/xen/include/asm-x86/p2m.h
>> > @@ -165,6 +165,7 @@ typedef unsigned int p2m_query_t;
>> >  
>> >  /* Useful predicates */
>> > +#define p2m_is_strict_ram(_t) (p2m_to_mask(_t) &
>> > p2m_to_mask(p2m_ram_rw))
>> 
>> Not sure - this now precludes mapping log-dirty or r/o pages, i.e. I
>> think you went from too relaxed to too strict. I think the question
>> needs answering in a more abstract fashion first, i.e. without code
>> written immediately: Which pages absolutely need to be foreign-
>> mappable and which ones can safely get foreign-mapped. My first
>> thought would be P2M_RAM_TYPES & ~P2M_HOLE_TYPES, with
>> P2M_HOLE_TYPES contained in P2M_RAM_TYPES generating a kind
>> of -EAGAIN notification while triggering the re-creation of the page
>> (which already is an indication that instead of P2M_HOLE_TYPES it
>> might well need to be P2M_PAGING_TYPES).
> 
> The current use case is p2m_ram_rw, my approach would be start with 
> minimum and add more as needed when features like migration, passthru, etc
> are added. My understanding of the various p2m types is limited, so I
> am at your mercy here to advise what types to anticipate. IMO, just
> checking for P2M_RAM_TYPES might suffice, I can additionally add check
> to make sure it's not P2M_HOLE_TYPES. Please lmk.

It's clear that p2m_ram_rw is too narrow: p2m_ram_ro and
p2m_ram_logdirty will minimally also need accepting here. As for the
paging/paged and shared types, I have to defer to the respective
maintainers - Andres, Tim?

Jan

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [V9 PATCH 7/8] pvh dom0: check for vioapic null ptr in vioapic_range
  2014-04-23  0:11             ` Mukesh Rathor
@ 2014-04-23  9:07               ` Jan Beulich
  2014-04-23 21:18                 ` Mukesh Rathor
  0 siblings, 1 reply; 58+ messages in thread
From: Jan Beulich @ 2014-04-23  9:07 UTC (permalink / raw)
  To: Mukesh Rathor
  Cc: George.Dunlap, tim, eddie.dong, keir.xen, jun.nakajima, xen-devel

>>> On 23.04.14 at 02:11, <mukesh.rathor@oracle.com> wrote:
> On Tue, 22 Apr 2014 08:33:29 +0100
> "Jan Beulich" <JBeulich@suse.com> wrote:
>> >>> On 22.04.14 at 02:59, <mukesh.rathor@oracle.com> wrote:
>> > The io emulation is handled by handle_pvh_io; there shouldn't be 
>> > path for pvh domu leading to this function with all the  
>> > restrictions and limitations it has at present.
>> 
>> In which case we're back to the initial question: Why is this patch
>> needed in the first place? If there is a separate emulation path
>> already, how do we manage to get to the point where you added the
>> extra check?
> 
> As described in the patch description:
> 
> -----
> pvh doesn't use apic emulation, as a result vioapic_init is not called
> and vioapic ptr in struct hvm_domain is not initialized. One path that
> would access the ptr for pvh is :
> 
>    hvm_hap_nested_page_fault -> handle_mmio -> hvmemul_do_io ->
>           hvm_mmio_intercept -> vioapic_range

So in your earlier reply (still quoted above) you said emulation gets
handled by handle_pvh_io(), yet here you point out a path going
through handle_mmio(). That's all very inconsistent.

> The only caller of hvm_hap_nested_page_fault is ept_handle_violation.
> 
> Now, 3 calls to handle_mmio in hvm_hap_nested_page_fault:
>   1st is for nested vcpu, so doesn't apply to PVH.
>   2nd has is_hvm check, 
> 
>   So it must have been the third one that I had observed the
>   vioapic_range crash in a while ago, and had made note of it. Looking
>   at it:
> 
>     if ( (p2mt == p2m_mmio_dm) ||
>          (access_w && (p2mt == p2m_ram_ro)) )
>     {
>         put_gfn(p2m->domain, gfn);
>         if ( !handle_mmio() )
> 
> doesn't seem apply to domu. Unfortunately, I can't reproduce it now
> so maybe it was an ept violation due to some bug, and a crash in 
> vioapic_range before printing the gfn/mfns etc by ept_handle_violation
> made me make a note to put a check in it.

Which makes me think that we don't need the patch at all.

> Hope that makes sense, and I'll assume you are ok with
> pvh_mmio_handlers[] change. Otherwise, please lmk.

I would be fine with it if it was proven that a change here is needed
at all.

Jan

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [V9 PATCH 6/8] pvh dom0: Add and remove foreign pages
  2014-04-23  9:03             ` Jan Beulich
@ 2014-04-23 16:13               ` Andres Lagar-Cavilla
  2014-04-24 16:37                 ` Tim Deegan
  0 siblings, 1 reply; 58+ messages in thread
From: Andres Lagar-Cavilla @ 2014-04-23 16:13 UTC (permalink / raw)
  To: Jan Beulich
  Cc: George.Dunlap, tim, eddie.dong, keir.xen, Andres Lagar-Cavilla,
	jun.nakajima, xen-devel


On Apr 23, 2014, at 5:03 AM, "Jan Beulich" <JBeulich@suse.com> wrote:

>>>> On 23.04.14 at 02:28, <mukesh.rathor@oracle.com> wrote:
>> On Tue, 22 Apr 2014 08:28:45 +0100
>> "Jan Beulich" <JBeulich@suse.com> wrote:
>> 
>>>>>> On 22.04.14 at 02:19, <mukesh.rathor@oracle.com> wrote:
>>>> On Wed, 16 Apr 2014 18:37:42 -0700
>>>> Mukesh Rathor <mukesh.rathor@oracle.com> wrote:
>>>> 
>>>>> On Wed, 16 Apr 2014 17:00:35 +0100
>>>>> "Jan Beulich" <JBeulich@suse.com> wrote:
>>>>> 
>> ............
>> 
>>>>     /* following will take a refcnt on the mfn */
>>>>     page = get_page_from_gfn(fdom, fgfn, &p2mt, P2M_ALLOC);
>>>> -    if ( !page || !p2m_is_valid(p2mt) )
>>>> +    if ( !page || !p2m_is_strict_ram(p2mt) )
>>>>     {
>>>>         if ( page )
>>>>             put_page(page);
>>>> diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h
>>>> index 47604da..e0bf6dc 100644
>>>> --- a/xen/include/asm-x86/p2m.h
>>>> +++ b/xen/include/asm-x86/p2m.h
>>>> @@ -165,6 +165,7 @@ typedef unsigned int p2m_query_t;
>>>> 
>>>> /* Useful predicates */
>>>> +#define p2m_is_strict_ram(_t) (p2m_to_mask(_t) &
>>>> p2m_to_mask(p2m_ram_rw))
>>> 
>>> Not sure - this now precludes mapping log-dirty or r/o pages, i.e. I
>>> think you went from too relaxed to too strict. I think the question
>>> needs answering in a more abstract fashion first, i.e. without code
>>> written immediately: Which pages absolutely need to be foreign-
>>> mappable and which ones can safely get foreign-mapped. My first
>>> thought would be P2M_RAM_TYPES & ~P2M_HOLE_TYPES, with
>>> P2M_HOLE_TYPES contained in P2M_RAM_TYPES generating a kind
>>> of -EAGAIN notification while triggering the re-creation of the page
>>> (which already is an indication that instead of P2M_HOLE_TYPES it
>>> might well need to be P2M_PAGING_TYPES).
>> 
>> The current use case is p2m_ram_rw, my approach would be start with 
>> minimum and add more as needed when features like migration, passthru, etc
>> are added. My understanding of the various p2m types is limited, so I
>> am at your mercy here to advise what types to anticipate. IMO, just
>> checking for P2M_RAM_TYPES might suffice, I can additionally add check
>> to make sure it's not P2M_HOLE_TYPES. Please lmk.
> 
> It's clear that p2m_ram_rw is too narrow: p2m_ram_ro and
> p2m_ram_logdirty will minimally also need accepting here. As for the
> paging/paged and shared types, I have to defer to the respective
> maintainers - Andres, Tim?
Not entirely sure on the context, Hence:

If you need types that represent p2m backed by actual host pages with up-to-date contents, although possibly with limited permissions:
_ram_rw | _ram_ro | _ram_logdiry | _ram_paging_out | _ram_shared

If you need types that represent p2m that after fault resolution will be backed by actual host pages with up-to-date contents, add:
_populate_on_demand | _ram_paged | _ram_paging_in

Consider also the _grant_map_r[w|o] types (they also fall under the first category).

Andres

> 
> Jan
> 

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [V9 PATCH 7/8] pvh dom0: check for vioapic null ptr in vioapic_range
  2014-04-23  9:07               ` Jan Beulich
@ 2014-04-23 21:18                 ` Mukesh Rathor
  2014-04-24  6:49                   ` Jan Beulich
  0 siblings, 1 reply; 58+ messages in thread
From: Mukesh Rathor @ 2014-04-23 21:18 UTC (permalink / raw)
  To: Jan Beulich
  Cc: George.Dunlap, tim, eddie.dong, keir.xen, jun.nakajima, xen-devel

On Wed, 23 Apr 2014 10:07:25 +0100
"Jan Beulich" <JBeulich@suse.com> wrote:

> >>> On 23.04.14 at 02:11, <mukesh.rathor@oracle.com> wrote:
> > On Tue, 22 Apr 2014 08:33:29 +0100
> > "Jan Beulich" <JBeulich@suse.com> wrote:
> >> >>> On 22.04.14 at 02:59, <mukesh.rathor@oracle.com> wrote:
......

> >   So it must have been the third one that I had observed the
> >   vioapic_range crash in a while ago, and had made note of it.
> > Looking at it:
> > 
> >     if ( (p2mt == p2m_mmio_dm) ||
> >          (access_w && (p2mt == p2m_ram_ro)) )
> >     {
> >         put_gfn(p2m->domain, gfn);
> >         if ( !handle_mmio() )
> > 
> > doesn't seem apply to domu. Unfortunately, I can't reproduce it now
> > so maybe it was an ept violation due to some bug, and a crash in 
> > vioapic_range before printing the gfn/mfns etc by
> > ept_handle_violation made me make a note to put a check in it.
> 
> Which makes me think that we don't need the patch at all.

Well, without this patch, in case of dom0 EPT violation, dom0 will
not die gracefully printing gfn/mfn/etc.. info. But instead it will
show fault in vioapic_range. 


ept_handle_violation() 
       hvm_hap_nested_page_fault()
             -> handle_mmio() -----> vioapic_range() : KABOOM!!

       gdprintk(XENLOG_ERR, "EPT violation %#lx (%c%c%c/%c%c%c), "
                    "gpa %#"PRIpaddr", mfn %#lx, type %i.\n",
                                 qualification,  <=== NOT REACHED
          .......

I can submit it later too I guess. But without it, we'd not know
the ept violation crashes.

thanks
mukesh

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [V9 PATCH 6/8] pvh dom0: Add and remove foreign pages
  2014-04-17 13:58           ` Jan Beulich
  2014-04-19  0:59             ` Mukesh Rathor
@ 2014-04-24  2:21             ` Mukesh Rathor
  2014-04-24  6:44               ` Jan Beulich
  2014-04-24  9:46               ` Tim Deegan
  1 sibling, 2 replies; 58+ messages in thread
From: Mukesh Rathor @ 2014-04-24  2:21 UTC (permalink / raw)
  To: Jan Beulich
  Cc: George.Dunlap, Tim Deegan, eddie.dong, keir.xen, jun.nakajima, xen-devel

On Thu, 17 Apr 2014 14:58:42 +0100
"Jan Beulich" <JBeulich@suse.com> wrote:

> >>> On 17.04.14 at 14:36, <tim@xen.org> wrote:
> > At 07:50 +0100 on 17 Apr (1397717440), Jan Beulich wrote:
> >> >>> On 17.04.14 at 03:37, <mukesh.rathor@oracle.com> wrote:
> >> > On Wed, 16 Apr 2014 17:00:35 +0100
> >> > "Jan Beulich" <JBeulich@suse.com> wrote:
.......
> >> > Well, Tim and I went back and forth several times on this over
> >> > the last several months (you were cc'd :) ). 
> >> 
> >> I know, but having worked a lot on the P2M code recently my
> >> perspective may have changed.
> > 
> > [I'm assuming the objection here is to having ther refcounts updated
> > in atomic_write_ept_entry, which was the change I requested.]
> > My opinion is still very strongly that reference counting must be
> > done when the entries change.  Trying to get this kind of thing
> > right in the callers is an _enormous_ PITA, as I learned working on
> > the shadow pagetables.  It would get very messy (see, e.g. the
> > myriad places where p2m op callers individually check for
> > paged/shared entries) and it'd be nigh impossible to debug where in
> > several hours of operation something changed a p2m entry from
> > foreign to something else without dropping a page ref.
> > 
> > That said, it should be easy enough only to refcount on leaf
> > entries, right?  I can't see how that would be incompatible with the
> > intermediate-node changes that Jan is working on.
> 
> Right - keeping the macro as is and introducing a derived function to
> handle the extra requirements on leaf entries would seem quite okay,
> so long as error propagation can be done properly.

Ok, how about something like the following? In case of get_page failure,
not sure EINVAL is the best one to return, EBUSY?

thanks
mukesh


diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c
index 1fa839a..db2fa3a 100644
--- a/xen/arch/x86/mm/p2m-ept.c
+++ b/xen/arch/x86/mm/p2m-ept.c
@@ -403,6 +403,21 @@ bool_t ept_handle_misconfig(uint64_t gpa)
     return !!okay;
 }
 
+static int ept_get_foreign_refcnt(mfn_t mfn)
+{
+    struct domain *fdom;
+
+    if ( !mfn_valid(mfn_x(mfn)) )
+        return -EINVAL;
+
+    fdom = page_get_owner(mfn_to_page(mfn_x(mfn)));
+    if ( fdom == NULL )
+        return -ESRCH;
+
+    /* get refcount on the page */
+    if ( !get_page(mfn_to_page(mfn_x(mfn)), fdom) )
+        return -EINVAL;
+
+    return 0;
+}
+
 /*
  * ept_set_entry() computes 'need_modify_vtd_table' for itself,
  * by observing whether any gfn->mfn translations are modified.
@@ -427,6 +442,7 @@ ept_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
     ept_entry_t new_entry = { .epte = 0 };
     struct ept_data *ept = &p2m->ept;
     struct domain *d = p2m->domain;
+    unsigned long prev_foreign_mfn = INVALID_MFN;
 
     ASSERT(ept);
     /*
@@ -460,6 +476,14 @@ ept_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
 
     ASSERT(ret != GUEST_TABLE_POD_PAGE || i != target);
 
+    /* foreign p2m types must be refcounted before being added */
+    if ( unlikely(p2m_is_foreign(p2mt)) )
+    {
+        rc = ept_get_foreign_refcnt(mfn);
+        if ( rc )
+            goto out;
+    }
+
     ept_entry = table + (gfn_remainder >> (i * EPT_TABLE_ORDER));
 
     /* In case VT-d uses same page table, this flag is needed by VT-d */ 
@@ -545,8 +569,14 @@ ept_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
         ept_p2m_type_to_flags(&new_entry, p2mt, p2ma);
     }
 
+    if ( unlikely(p2m_is_foreign(ept_entry->sa_p2mt)) )
+        prev_foreign_mfn = ept_entry->mfn;
+
     atomic_write_ept_entry(ept_entry, new_entry);
 
+    if ( unlikely(prev_foreign_mfn != INVALID_MFN) )
+        put_page(mfn_to_page(prev_foreign_mfn));
+
     /* Track the highest gfn for which we have ever had a valid mapping */
     if ( p2mt != p2m_invalid &&
          (gfn + (1UL << order) - 1 > p2m->max_mapped_pfn) )
@@ -576,6 +606,10 @@ out:
         }
     }
 
+    /* do cleanup for foreign types in case of error */
+    if ( unlikely(rc && ept_entry && p2m_is_foreign(p2mt)) )
+        put_page(mfn_to_page(mfn_x(mfn)));
+
     /* Release the old intermediate tables, if any.  This has to be the
        last thing we do, after the ept_sync_domain() and removal
        from the iommu tables, so as to avoid a potential

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* Re: [V9 PATCH 6/8] pvh dom0: Add and remove foreign pages
  2014-04-24  2:21             ` Mukesh Rathor
@ 2014-04-24  6:44               ` Jan Beulich
  2014-04-24  9:46               ` Tim Deegan
  1 sibling, 0 replies; 58+ messages in thread
From: Jan Beulich @ 2014-04-24  6:44 UTC (permalink / raw)
  To: Mukesh Rathor
  Cc: George.Dunlap, Tim Deegan, eddie.dong, keir.xen, jun.nakajima, xen-devel

>>> On 24.04.14 at 04:21, <mukesh.rathor@oracle.com> wrote:
> On Thu, 17 Apr 2014 14:58:42 +0100
> "Jan Beulich" <JBeulich@suse.com> wrote:
>> Right - keeping the macro as is and introducing a derived function to
>> handle the extra requirements on leaf entries would seem quite okay,
>> so long as error propagation can be done properly.
> 
> Ok, how about something like the following?

Looks reasonable, especially if this is indeed the only place where leaf
entries get modified. I suppose there's no path allowing 2M or 1G pages
to be created with foreign type?

> In case of get_page failure,
> not sure EINVAL is the best one to return, EBUSY?

Indeed, -EBUSY seems like the more applicable one here.

> @@ -545,8 +569,14 @@ ept_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
>          ept_p2m_type_to_flags(&new_entry, p2mt, p2ma);
>      }
>  
> +    if ( unlikely(p2m_is_foreign(ept_entry->sa_p2mt)) )
> +        prev_foreign_mfn = ept_entry->mfn;
> +
>      atomic_write_ept_entry(ept_entry, new_entry);
>  
> +    if ( unlikely(prev_foreign_mfn != INVALID_MFN) )
> +        put_page(mfn_to_page(prev_foreign_mfn));
> +
>      /* Track the highest gfn for which we have ever had a valid mapping */
>      if ( p2mt != p2m_invalid &&
>           (gfn + (1UL << order) - 1 > p2m->max_mapped_pfn) )
> @@ -576,6 +606,10 @@ out:
>          }
>      }
>  
> +    /* do cleanup for foreign types in case of error */
> +    if ( unlikely(rc && ept_entry && p2m_is_foreign(p2mt)) )
> +        put_page(mfn_to_page(mfn_x(mfn)));

Comparing this with the above suggests that type safety rules
would want prev_foreign_mfn to be mfn_t, and this as well as the
code in ept_get_foreign_refcnt() suggest that you may want/
need to add *_to_* overrides similar to the ones near the top of
p2m-pt.c.

Jan

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [V9 PATCH 7/8] pvh dom0: check for vioapic null ptr in vioapic_range
  2014-04-23 21:18                 ` Mukesh Rathor
@ 2014-04-24  6:49                   ` Jan Beulich
  2014-04-24 23:28                     ` Mukesh Rathor
  2014-05-06  0:19                     ` Mukesh Rathor
  0 siblings, 2 replies; 58+ messages in thread
From: Jan Beulich @ 2014-04-24  6:49 UTC (permalink / raw)
  To: Mukesh Rathor
  Cc: George.Dunlap, tim, eddie.dong, keir.xen, jun.nakajima, xen-devel

>>> On 23.04.14 at 23:18, <mukesh.rathor@oracle.com> wrote:
> On Wed, 23 Apr 2014 10:07:25 +0100
> "Jan Beulich" <JBeulich@suse.com> wrote:
> 
>> >>> On 23.04.14 at 02:11, <mukesh.rathor@oracle.com> wrote:
>> > On Tue, 22 Apr 2014 08:33:29 +0100
>> > "Jan Beulich" <JBeulich@suse.com> wrote:
>> >> >>> On 22.04.14 at 02:59, <mukesh.rathor@oracle.com> wrote:
> ......
> 
>> >   So it must have been the third one that I had observed the
>> >   vioapic_range crash in a while ago, and had made note of it.
>> > Looking at it:
>> > 
>> >     if ( (p2mt == p2m_mmio_dm) ||
>> >          (access_w && (p2mt == p2m_ram_ro)) )
>> >     {
>> >         put_gfn(p2m->domain, gfn);
>> >         if ( !handle_mmio() )
>> > 
>> > doesn't seem apply to domu. Unfortunately, I can't reproduce it now
>> > so maybe it was an ept violation due to some bug, and a crash in 
>> > vioapic_range before printing the gfn/mfns etc by
>> > ept_handle_violation made me make a note to put a check in it.
>> 
>> Which makes me think that we don't need the patch at all.
> 
> Well, without this patch, in case of dom0 EPT violation, dom0 will
> not die gracefully printing gfn/mfn/etc.. info. But instead it will
> show fault in vioapic_range. 
> 
> 
> ept_handle_violation() 
>        hvm_hap_nested_page_fault()
>              -> handle_mmio() -----> vioapic_range() : KABOOM!!
> 
>        gdprintk(XENLOG_ERR, "EPT violation %#lx (%c%c%c/%c%c%c), "
>                     "gpa %#"PRIpaddr", mfn %#lx, type %i.\n",
>                                  qualification,  <=== NOT REACHED
>           .......
> 
> I can submit it later too I guess. But without it, we'd not know
> the ept violation crashes.

So we're moving in circles I'm afraid: You told us that I/O emulation
is being handled by a separate path from HVM's, which means either
handle_mmio() separates the cases itself, or doesn't even get
called, only to then again show us the call sequence above. One
of the two statements can't be correct, and what I'd like you to do
about it depends on which one it is.

Jan

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [V9 PATCH 6/8] pvh dom0: Add and remove foreign pages
  2014-04-24  2:21             ` Mukesh Rathor
  2014-04-24  6:44               ` Jan Beulich
@ 2014-04-24  9:46               ` Tim Deegan
  2014-04-25  2:09                 ` Mukesh Rathor
  1 sibling, 1 reply; 58+ messages in thread
From: Tim Deegan @ 2014-04-24  9:46 UTC (permalink / raw)
  To: Mukesh Rathor
  Cc: Jan Beulich, George.Dunlap, eddie.dong, keir.xen, jun.nakajima,
	xen-devel

At 19:21 -0700 on 23 Apr (1398277311), Mukesh Rathor wrote:
> On Thu, 17 Apr 2014 14:58:42 +0100
> "Jan Beulich" <JBeulich@suse.com> wrote:
> 
> > >>> On 17.04.14 at 14:36, <tim@xen.org> wrote:
> > > At 07:50 +0100 on 17 Apr (1397717440), Jan Beulich wrote:
> > >> >>> On 17.04.14 at 03:37, <mukesh.rathor@oracle.com> wrote:
> > >> > On Wed, 16 Apr 2014 17:00:35 +0100
> > >> > "Jan Beulich" <JBeulich@suse.com> wrote:
> .......
> > >> > Well, Tim and I went back and forth several times on this over
> > >> > the last several months (you were cc'd :) ). 
> > >> 
> > >> I know, but having worked a lot on the P2M code recently my
> > >> perspective may have changed.
> > > 
> > > [I'm assuming the objection here is to having ther refcounts updated
> > > in atomic_write_ept_entry, which was the change I requested.]
> > > My opinion is still very strongly that reference counting must be
> > > done when the entries change.  Trying to get this kind of thing
> > > right in the callers is an _enormous_ PITA, as I learned working on
> > > the shadow pagetables.  It would get very messy (see, e.g. the
> > > myriad places where p2m op callers individually check for
> > > paged/shared entries) and it'd be nigh impossible to debug where in
> > > several hours of operation something changed a p2m entry from
> > > foreign to something else without dropping a page ref.
> > > 
> > > That said, it should be easy enough only to refcount on leaf
> > > entries, right?  I can't see how that would be incompatible with the
> > > intermediate-node changes that Jan is working on.
> > 
> > Right - keeping the macro as is and introducing a derived function to
> > handle the extra requirements on leaf entries would seem quite okay,
> > so long as error propagation can be done properly.
> 
> Ok, how about something like the following? In case of get_page failure,
> not sure EINVAL is the best one to return, EBUSY?

This goes back to having refcounts open-coded.  Having the refcounts
open-coded around the atomic_write_ept_entry() in ept_set_entry()
means there are now places where the epte can change without
maintaining the refcount invariants: ept_change_entry_type_page(), for
example.

I would _much_ prefer to have atomic_write_ept_entry() DTRT -- it
would have to know the difference between leaf and non-leaf entries,
and return an error code.  I'd also be OK with having two
atomic_write ops, one for leaf and one for non-leaf, with appropriate
ASSERT()s on the contents.

Cheers,

Tim.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [V9 PATCH 6/8] pvh dom0: Add and remove foreign pages
  2014-04-23 16:13               ` Andres Lagar-Cavilla
@ 2014-04-24 16:37                 ` Tim Deegan
  0 siblings, 0 replies; 58+ messages in thread
From: Tim Deegan @ 2014-04-24 16:37 UTC (permalink / raw)
  To: Andres Lagar-Cavilla
  Cc: Jan Beulich, George.Dunlap, eddie.dong, keir.xen,
	Andres Lagar-Cavilla, jun.nakajima, xen-devel

At 12:13 -0400 on 23 Apr (1398251599), Andres Lagar-Cavilla wrote:
> 
> On Apr 23, 2014, at 5:03 AM, "Jan Beulich" <JBeulich@suse.com> wrote:
> 
> >>>> On 23.04.14 at 02:28, <mukesh.rathor@oracle.com> wrote:
> >> On Tue, 22 Apr 2014 08:28:45 +0100
> >> "Jan Beulich" <JBeulich@suse.com> wrote:
> >> 
> >>>>>> On 22.04.14 at 02:19, <mukesh.rathor@oracle.com> wrote:
> >>>> On Wed, 16 Apr 2014 18:37:42 -0700
> >>>> Mukesh Rathor <mukesh.rathor@oracle.com> wrote:
> >>>> 
> >>>>> On Wed, 16 Apr 2014 17:00:35 +0100
> >>>>> "Jan Beulich" <JBeulich@suse.com> wrote:
> >>>>> 
> >> ............
> >> 
> >>>>     /* following will take a refcnt on the mfn */
> >>>>     page = get_page_from_gfn(fdom, fgfn, &p2mt, P2M_ALLOC);
> >>>> -    if ( !page || !p2m_is_valid(p2mt) )
> >>>> +    if ( !page || !p2m_is_strict_ram(p2mt) )
> >>>>     {
> >>>>         if ( page )
> >>>>             put_page(page);
> >>>> diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h
> >>>> index 47604da..e0bf6dc 100644
> >>>> --- a/xen/include/asm-x86/p2m.h
> >>>> +++ b/xen/include/asm-x86/p2m.h
> >>>> @@ -165,6 +165,7 @@ typedef unsigned int p2m_query_t;
> >>>> 
> >>>> /* Useful predicates */
> >>>> +#define p2m_is_strict_ram(_t) (p2m_to_mask(_t) &
> >>>> p2m_to_mask(p2m_ram_rw))
> >>> 
> >>> Not sure - this now precludes mapping log-dirty or r/o pages, i.e. I
> >>> think you went from too relaxed to too strict. I think the question
> >>> needs answering in a more abstract fashion first, i.e. without code
> >>> written immediately: Which pages absolutely need to be foreign-
> >>> mappable and which ones can safely get foreign-mapped. My first
> >>> thought would be P2M_RAM_TYPES & ~P2M_HOLE_TYPES, with
> >>> P2M_HOLE_TYPES contained in P2M_RAM_TYPES generating a kind
> >>> of -EAGAIN notification while triggering the re-creation of the page
> >>> (which already is an indication that instead of P2M_HOLE_TYPES it
> >>> might well need to be P2M_PAGING_TYPES).
> >> 
> >> The current use case is p2m_ram_rw, my approach would be start with 
> >> minimum and add more as needed when features like migration, passthru, etc
> >> are added. My understanding of the various p2m types is limited, so I
> >> am at your mercy here to advise what types to anticipate. IMO, just
> >> checking for P2M_RAM_TYPES might suffice, I can additionally add check
> >> to make sure it's not P2M_HOLE_TYPES. Please lmk.
> > 
> > It's clear that p2m_ram_rw is too narrow: p2m_ram_ro and
> > p2m_ram_logdirty will minimally also need accepting here. As for the
> > paging/paged and shared types, I have to defer to the respective
> > maintainers - Andres, Tim?
> Not entirely sure on the context, Hence:
> 
> If you need types that represent p2m backed by actual host pages with up-to-date contents, although possibly with limited permissions:
> _ram_rw | _ram_ro | _ram_logdiry | _ram_paging_out | _ram_shared

That looks OK to me, but with the caveat that you must be very careful
about mapping shared pages!  Needs to be mapped read-only or else
unshared first.

> Consider also the _grant_map_r[w|o] types (they also fall under the first category).

I think we can disallow those, on the grounds that the equivalent PV
operation doesn't handle them either.

Tim.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [V9 PATCH 7/8] pvh dom0: check for vioapic null ptr in vioapic_range
  2014-04-24  6:49                   ` Jan Beulich
@ 2014-04-24 23:28                     ` Mukesh Rathor
  2014-05-06  0:19                     ` Mukesh Rathor
  1 sibling, 0 replies; 58+ messages in thread
From: Mukesh Rathor @ 2014-04-24 23:28 UTC (permalink / raw)
  To: Jan Beulich
  Cc: George.Dunlap, tim, eddie.dong, keir.xen, jun.nakajima, xen-devel

On Thu, 24 Apr 2014 07:49:44 +0100
"Jan Beulich" <JBeulich@suse.com> wrote:

> >>> On 23.04.14 at 23:18, <mukesh.rathor@oracle.com> wrote:
> > On Wed, 23 Apr 2014 10:07:25 +0100
> > "Jan Beulich" <JBeulich@suse.com> wrote:
> > 
> >> >>> On 23.04.14 at 02:11, <mukesh.rathor@oracle.com> wrote:
> >> > On Tue, 22 Apr 2014 08:33:29 +0100
> >> > "Jan Beulich" <JBeulich@suse.com> wrote:
> >> >> >>> On 22.04.14 at 02:59, <mukesh.rathor@oracle.com> wrote:
> > ......
> > 
> >> >   So it must have been the third one that I had observed the
> >> >   vioapic_range crash in a while ago, and had made note of it.
> >> > Looking at it:
> >> > 
> >> >     if ( (p2mt == p2m_mmio_dm) ||
> >> >          (access_w && (p2mt == p2m_ram_ro)) )
> >> >     {
> >> >         put_gfn(p2m->domain, gfn);
> >> >         if ( !handle_mmio() )
> >> > 
> >> > doesn't seem apply to domu. Unfortunately, I can't reproduce it
> >> > now so maybe it was an ept violation due to some bug, and a
> >> > crash in vioapic_range before printing the gfn/mfns etc by
> >> > ept_handle_violation made me make a note to put a check in it.
> >> 
> >> Which makes me think that we don't need the patch at all.
> > 
> > Well, without this patch, in case of dom0 EPT violation, dom0 will
> > not die gracefully printing gfn/mfn/etc.. info. But instead it will
> > show fault in vioapic_range. 
> > 
> > 
> > ept_handle_violation() 
> >        hvm_hap_nested_page_fault()
> >              -> handle_mmio() -----> vioapic_range() : KABOOM!!
> > 
> >        gdprintk(XENLOG_ERR, "EPT violation %#lx (%c%c%c/%c%c%c), "
> >                     "gpa %#"PRIpaddr", mfn %#lx, type %i.\n",
> >                                  qualification,  <=== NOT REACHED
> >           .......
> > 
> > I can submit it later too I guess. But without it, we'd not know
> > the ept violation crashes.
> 
> So we're moving in circles I'm afraid: You told us that I/O emulation
> is being handled by a separate path from HVM's, which means either
> handle_mmio() separates the cases itself, or doesn't even get
> called, only to then again show us the call sequence above. One

Correct, doesn't get called other than above path, and my initial
patch had put is_pvh check in hvm_hap_nested_page_fault which would 
avoid call to handle_mmio. But I understand adding too many is_pvh 
checks is undesirable.

I can't reproduce the crash now, so let's just drop this patch for
now.

thanks
mukesh

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [V9 PATCH 6/8] pvh dom0: Add and remove foreign pages
  2014-04-24  9:46               ` Tim Deegan
@ 2014-04-25  2:09                 ` Mukesh Rathor
  2014-04-25  6:49                   ` Jan Beulich
                                     ` (2 more replies)
  0 siblings, 3 replies; 58+ messages in thread
From: Mukesh Rathor @ 2014-04-25  2:09 UTC (permalink / raw)
  To: Tim Deegan
  Cc: Jan Beulich, George.Dunlap, eddie.dong, keir.xen, jun.nakajima,
	xen-devel

On Thu, 24 Apr 2014 11:46:41 +0200
Tim Deegan <tim@xen.org> wrote:

> At 19:21 -0700 on 23 Apr (1398277311), Mukesh Rathor wrote:
> > On Thu, 17 Apr 2014 14:58:42 +0100
> > "Jan Beulich" <JBeulich@suse.com> wrote:
> > 
> > > >>> On 17.04.14 at 14:36, <tim@xen.org> wrote:
> > > > At 07:50 +0100 on 17 Apr (1397717440), Jan Beulich wrote:
> > > >> >>> On 17.04.14 at 03:37, <mukesh.rathor@oracle.com> wrote:
> > > >> > On Wed, 16 Apr 2014 17:00:35 +0100
> > > >> > "Jan Beulich" <JBeulich@suse.com> wrote:
......
> > > > That said, it should be easy enough only to refcount on leaf
> > > > entries, right?  I can't see how that would be incompatible
> > > > with the intermediate-node changes that Jan is working on.
> > > 
> > > Right - keeping the macro as is and introducing a derived
> > > function to handle the extra requirements on leaf entries would
> > > seem quite okay, so long as error propagation can be done
> > > properly.
> > 
> > Ok, how about something like the following? In case of get_page
> > failure, not sure EINVAL is the best one to return, EBUSY?
> 
> This goes back to having refcounts open-coded.  Having the refcounts
> open-coded around the atomic_write_ept_entry() in ept_set_entry()
> means there are now places where the epte can change without
> maintaining the refcount invariants: ept_change_entry_type_page(), for
> example.

Correct, altho, at present I've checks in p2m paths to not allow foreign
types to come down to such calls.

> I would _much_ prefer to have atomic_write_ept_entry() DTRT -- it
> would have to know the difference between leaf and non-leaf entries,
> and return an error code.  I'd also be OK with having two
> atomic_write ops, one for leaf and one for non-leaf, with appropriate
> ASSERT()s on the contents.

Ok, how about something like shown further below?  (I think
it would be more simpler to have one atomic_write ops, instead of two)

One thing, on returning error code, since at present there are no
paths allowing superpages for foreign types, it appears I'd not need 
to worry about undoing the ept_split_super_page(), so I added
assert in there. Sound right?

Finally, I can leave other callers of atomic_write_ept_entry as is, or 
add ASSERTs for rc == 0. LMK.

thanks for patience,
mukesh


diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c
index 1fa839a..41a8c40 100644
--- a/xen/arch/x86/mm/p2m-ept.c
+++ b/xen/arch/x86/mm/p2m-ept.c
@@ -36,8 +36,6 @@
 
 #define atomic_read_ept_entry(__pepte)                              \
     ( (ept_entry_t) { .epte = read_atomic(&(__pepte)->epte) } )
-#define atomic_write_ept_entry(__pepte, __epte)                     \
-    write_atomic(&(__pepte)->epte, (__epte).epte)
 
 #define is_epte_present(ept_entry)      ((ept_entry)->epte & 0x7)
 #define is_epte_superpage(ept_entry)    ((ept_entry)->sp)
@@ -46,6 +44,43 @@ static inline bool_t is_epte_valid(ept_entry_t *e)
     return (e->epte != 0 && e->sa_p2mt != p2m_invalid);
 }
 
+/* returns : 0 for success, -errno otherwise */
+static int atomic_write_ept_entry(ept_entry_t *entryptr, ept_entry_t new)
+{
+    unsigned long oldmfn;
+    struct domain *fdom;
+    bool_t new_foreign = p2m_is_foreign(new.sa_p2mt);
+    bool_t old_foreign = p2m_is_foreign(entryptr->sa_p2mt);
+
+    if (is_epte_superpage(entryptr) || likely(!new_foreign && !old_foreign) )
+    {
+        write_atomic(&entryptr->epte, new.epte);
+        return 0;
+    }
+    
+    if ( new_foreign )
+    {
+        if ( !mfn_valid(new.mfn) )
+            return -EINVAL;
+
+        fdom = page_get_owner(mfn_to_page(new.mfn));
+        if ( fdom == NULL )
+            return -ESRCH;
+
+        /* get refcount on the page */
+        if ( !get_page(mfn_to_page(new.mfn), fdom) )
+            return -EBUSY;
+    }
+
+    oldmfn = entryptr->mfn;
+    write_atomic(&entryptr->epte, new.epte);
+
+    if ( old_foreign )
+        put_page(mfn_to_page(oldmfn));
+
+    return 0;
+}
+
 static void ept_p2m_type_to_flags(ept_entry_t *entry, p2m_type_t type, p2m_access_t access)
 {
     /* First apply type permissions */
@@ -494,6 +529,7 @@ ept_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
         ept_entry_t split_ept_entry;
 
         ASSERT(is_epte_superpage(ept_entry));
+        ASSERT(!p2m_is_foreign(p2mt));
 
         split_ept_entry = atomic_read_ept_entry(ept_entry);
 
@@ -545,7 +581,7 @@ ept_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
         ept_p2m_type_to_flags(&new_entry, p2mt, p2ma);
     }
 
-    atomic_write_ept_entry(ept_entry, new_entry);
+    rc = atomic_write_ept_entry(ept_entry, new_entry);
 
     /* Track the highest gfn for which we have ever had a valid mapping */
     if ( p2mt != p2m_invalid &&

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* Re: [V9 PATCH 6/8] pvh dom0: Add and remove foreign pages
  2014-04-25  2:09                 ` Mukesh Rathor
@ 2014-04-25  6:49                   ` Jan Beulich
  2014-04-25 23:23                     ` Mukesh Rathor
  2014-04-26  0:06                     ` Mukesh Rathor
  2014-04-25  8:55                   ` Tim Deegan
  2014-04-26  1:34                   ` Mukesh Rathor
  2 siblings, 2 replies; 58+ messages in thread
From: Jan Beulich @ 2014-04-25  6:49 UTC (permalink / raw)
  To: Mukesh Rathor, Tim Deegan
  Cc: George.Dunlap, xen-devel, keir.xen, eddie.dong, jun.nakajima

>>> On 25.04.14 at 04:09, <mukesh.rathor@oracle.com> wrote:
> Ok, how about something like shown further below?  (I think
> it would be more simpler to have one atomic_write ops, instead of two)

That's an understandable desire, but ...

> +/* returns : 0 for success, -errno otherwise */
> +static int atomic_write_ept_entry(ept_entry_t *entryptr, ept_entry_t new)
> +{
> +    unsigned long oldmfn;
> +    struct domain *fdom;
> +    bool_t new_foreign = p2m_is_foreign(new.sa_p2mt);
> +    bool_t old_foreign = p2m_is_foreign(entryptr->sa_p2mt);

... these aren't really valid for intermediate entries (they just end up
getting p2m_ram_rw put into them right now, but that's in no way
explicit, and hence not set in stone). As said before, I can see the
need to use these fields eventually, so this would end up being a
latent problem. Hence minimally I'd want you to explicitly set the
field in ept_set_middle_entry() to make clear that we now depend on
it having a certain value.

Jan

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [V9 PATCH 6/8] pvh dom0: Add and remove foreign pages
  2014-04-25  2:09                 ` Mukesh Rathor
  2014-04-25  6:49                   ` Jan Beulich
@ 2014-04-25  8:55                   ` Tim Deegan
  2014-04-25 23:29                     ` Mukesh Rathor
  2014-04-26  1:34                   ` Mukesh Rathor
  2 siblings, 1 reply; 58+ messages in thread
From: Tim Deegan @ 2014-04-25  8:55 UTC (permalink / raw)
  To: Mukesh Rathor
  Cc: Jan Beulich, George.Dunlap, eddie.dong, keir.xen, jun.nakajima,
	xen-devel

At 19:09 -0700 on 24 Apr (1398362965), Mukesh Rathor wrote:
> On Thu, 24 Apr 2014 11:46:41 +0200
> Tim Deegan <tim@xen.org> wrote:
> 
> > At 19:21 -0700 on 23 Apr (1398277311), Mukesh Rathor wrote:
> > > On Thu, 17 Apr 2014 14:58:42 +0100
> > > "Jan Beulich" <JBeulich@suse.com> wrote:
> > > 
> > > > >>> On 17.04.14 at 14:36, <tim@xen.org> wrote:
> > > > > At 07:50 +0100 on 17 Apr (1397717440), Jan Beulich wrote:
> > > > >> >>> On 17.04.14 at 03:37, <mukesh.rathor@oracle.com> wrote:
> > > > >> > On Wed, 16 Apr 2014 17:00:35 +0100
> > > > >> > "Jan Beulich" <JBeulich@suse.com> wrote:
> ......
> > > > > That said, it should be easy enough only to refcount on leaf
> > > > > entries, right?  I can't see how that would be incompatible
> > > > > with the intermediate-node changes that Jan is working on.
> > > > 
> > > > Right - keeping the macro as is and introducing a derived
> > > > function to handle the extra requirements on leaf entries would
> > > > seem quite okay, so long as error propagation can be done
> > > > properly.
> > > 
> > > Ok, how about something like the following? In case of get_page
> > > failure, not sure EINVAL is the best one to return, EBUSY?
> > 
> > This goes back to having refcounts open-coded.  Having the refcounts
> > open-coded around the atomic_write_ept_entry() in ept_set_entry()
> > means there are now places where the epte can change without
> > maintaining the refcount invariants: ept_change_entry_type_page(), for
> > example.
> 
> Correct, altho, at present I've checks in p2m paths to not allow foreign
> types to come down to such calls.
> 
> > I would _much_ prefer to have atomic_write_ept_entry() DTRT -- it
> > would have to know the difference between leaf and non-leaf entries,
> > and return an error code.  I'd also be OK with having two
> > atomic_write ops, one for leaf and one for non-leaf, with appropriate
> > ASSERT()s on the contents.
> 
> Ok, how about something like shown further below?  (I think
> it would be more simpler to have one atomic_write ops, instead of two)

I like this approach.  I think the test for foreignness needs to check
that it's dealing with a leaf node, e.g. by checking that bit .sp == 1
and setting that bit in all leaf entries.  At the moment we clear .sp in
4k entries but the docs say it's ignored by hardware so we could set
it, I think -- it would need an adjustment of ept_next_level() to
detect superpages by checking the level.

> One thing, on returning error code, since at present there are no
> paths allowing superpages for foreign types, it appears I'd not need 
> to worry about undoing the ept_split_super_page(), so I added
> assert in there. Sound right?

Yep, an ASSERT to catch foreign superpage mappings is useful.

> Finally, I can leave other callers of atomic_write_ept_entry as is, or 
> add ASSERTs for rc == 0. LMK.

I would be inclined to ASSERT().

Cheers,

Tim.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [V9 PATCH 6/8] pvh dom0: Add and remove foreign pages
  2014-04-25  6:49                   ` Jan Beulich
@ 2014-04-25 23:23                     ` Mukesh Rathor
  2014-04-26  0:06                     ` Mukesh Rathor
  1 sibling, 0 replies; 58+ messages in thread
From: Mukesh Rathor @ 2014-04-25 23:23 UTC (permalink / raw)
  To: Jan Beulich
  Cc: George.Dunlap, Tim Deegan, eddie.dong, keir.xen, jun.nakajima, xen-devel

On Fri, 25 Apr 2014 07:49:31 +0100
"Jan Beulich" <JBeulich@suse.com> wrote:

> >>> On 25.04.14 at 04:09, <mukesh.rathor@oracle.com> wrote:
> > Ok, how about something like shown further below?  (I think
> > it would be more simpler to have one atomic_write ops, instead of
> > two)
> 
> That's an understandable desire, but ...
> 
> > +/* returns : 0 for success, -errno otherwise */
> > +static int atomic_write_ept_entry(ept_entry_t *entryptr,
> > ept_entry_t new) +{
> > +    unsigned long oldmfn;
> > +    struct domain *fdom;
> > +    bool_t new_foreign = p2m_is_foreign(new.sa_p2mt);
> > +    bool_t old_foreign = p2m_is_foreign(entryptr->sa_p2mt);
> 
> ... these aren't really valid for intermediate entries (they just end
> up getting p2m_ram_rw put into them right now, but that's in no way

Right, I see by virtue of setting the entry to 0, and p2m_ram_rw =0.
Hmm... time for a new type, p2mt == p2m_intermed (just kidding :))... 

> explicit, and hence not set in stone). As said before, I can see the
> need to use these fields eventually, so this would end up being a
> latent problem. Hence minimally I'd want you to explicitly set the
> field in ept_set_middle_entry() to make clear that we now depend on
> it having a certain value.

Correct that would work for now, but would make the bits unusable for
intermediate entries.... Hmmm.. let me think and look more at the code
and get back.

thanks
mukesh

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [V9 PATCH 6/8] pvh dom0: Add and remove foreign pages
  2014-04-25  8:55                   ` Tim Deegan
@ 2014-04-25 23:29                     ` Mukesh Rathor
  0 siblings, 0 replies; 58+ messages in thread
From: Mukesh Rathor @ 2014-04-25 23:29 UTC (permalink / raw)
  To: Tim Deegan
  Cc: Jan Beulich, George.Dunlap, eddie.dong, keir.xen, jun.nakajima,
	xen-devel

On Fri, 25 Apr 2014 10:55:19 +0200
Tim Deegan <tim@xen.org> wrote:

> At 19:09 -0700 on 24 Apr (1398362965), Mukesh Rathor wrote:
> > On Thu, 24 Apr 2014 11:46:41 +0200
> > Tim Deegan <tim@xen.org> wrote:
> > 
> > > At 19:21 -0700 on 23 Apr (1398277311), Mukesh Rathor wrote:
> > > > On Thu, 17 Apr 2014 14:58:42 +0100
> > > > "Jan Beulich" <JBeulich@suse.com> wrote:
> > > > 
> > > > > >>> On 17.04.14 at 14:36, <tim@xen.org> wrote:
> > > > > > At 07:50 +0100 on 17 Apr (1397717440), Jan Beulich wrote:
> > > > > >> >>> On 17.04.14 at 03:37, <mukesh.rathor@oracle.com> wrote:
> > > > > >> > On Wed, 16 Apr 2014 17:00:35 +0100
> > > > > >> > "Jan Beulich" <JBeulich@suse.com> wrote:
> > ......
> > > > > > That said, it should be easy enough only to refcount on leaf
> > > > > > entries, right?  I can't see how that would be incompatible
> > > > > > with the intermediate-node changes that Jan is working on.
> > > > > 
> > > > > Right - keeping the macro as is and introducing a derived
> > > > > function to handle the extra requirements on leaf entries
> > > > > would seem quite okay, so long as error propagation can be
> > > > > done properly.
> > > > 
> > > > Ok, how about something like the following? In case of get_page
> > > > failure, not sure EINVAL is the best one to return, EBUSY?
> > > 
> > > This goes back to having refcounts open-coded.  Having the
> > > refcounts open-coded around the atomic_write_ept_entry() in
> > > ept_set_entry() means there are now places where the epte can
> > > change without maintaining the refcount invariants:
> > > ept_change_entry_type_page(), for example.
> > 
> > Correct, altho, at present I've checks in p2m paths to not allow
> > foreign types to come down to such calls.
> > 
> > > I would _much_ prefer to have atomic_write_ept_entry() DTRT -- it
> > > would have to know the difference between leaf and non-leaf
> > > entries, and return an error code.  I'd also be OK with having two
> > > atomic_write ops, one for leaf and one for non-leaf, with
> > > appropriate ASSERT()s on the contents.
> > 
> > Ok, how about something like shown further below?  (I think
> > it would be more simpler to have one atomic_write ops, instead of
> > two)
> 
> I like this approach.  I think the test for foreignness needs to check
> that it's dealing with a leaf node, e.g. by checking that bit .sp == 1

Right, right. My bad, somehow my brain got fixated thinking that sp ==0
meant we were on leaf node. 

> and setting that bit in all leaf entries.  At the moment we clear .sp
> in 4k entries but the docs say it's ignored by hardware so we could
> set it, I think -- it would need an adjustment of ept_next_level() to
> detect superpages by checking the level.

Right, that's an option. The bit is ignored for L1 entries. But would
be super confusing to anyone looking at it, me first. By convention it's 
been the superpage bit.... Let me see if I can come up with alternate 
solutions/proposoals then, now that I (finally) understand whats going
on :). 

thanks
Mukesh

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [V9 PATCH 6/8] pvh dom0: Add and remove foreign pages
  2014-04-25  6:49                   ` Jan Beulich
  2014-04-25 23:23                     ` Mukesh Rathor
@ 2014-04-26  0:06                     ` Mukesh Rathor
  2014-04-28  7:23                       ` Jan Beulich
  1 sibling, 1 reply; 58+ messages in thread
From: Mukesh Rathor @ 2014-04-26  0:06 UTC (permalink / raw)
  To: Jan Beulich
  Cc: George.Dunlap, Tim Deegan, eddie.dong, keir.xen, jun.nakajima, xen-devel

On Fri, 25 Apr 2014 07:49:31 +0100
"Jan Beulich" <JBeulich@suse.com> wrote:

> >>> On 25.04.14 at 04:09, <mukesh.rathor@oracle.com> wrote:
> > Ok, how about something like shown further below?  (I think
> > it would be more simpler to have one atomic_write ops, instead of
> > two)
> 
> That's an understandable desire, but ...
> 
> > +/* returns : 0 for success, -errno otherwise */
> > +static int atomic_write_ept_entry(ept_entry_t *entryptr,
> > ept_entry_t new) +{
> > +    unsigned long oldmfn;
> > +    struct domain *fdom;
> > +    bool_t new_foreign = p2m_is_foreign(new.sa_p2mt);
> > +    bool_t old_foreign = p2m_is_foreign(entryptr->sa_p2mt);
> 
> ... these aren't really valid for intermediate entries (they just end
> up getting p2m_ram_rw put into them right now, but that's in no way
> explicit, and hence not set in stone). As said before, I can see the
> need to use these fields eventually, so this would end up being a
> latent problem. Hence minimally I'd want you to explicitly set the
> field in ept_set_middle_entry() to make clear that we now depend on
> it having a certain value.

BTW, one thing that is confusing looking at the code is :

ept_memory_type_changed() calls ept_invalidate_emt with PML4 mfn.
ept_invalidate_emt walks l4 entries checking for emt and doing
atomic_write_ept_entry on l4 entries. However, looking at latest 
intel SDM figure 28-1, vol 3b, it appears those bits are reserved?
EPT MT is only valid for L3, l2 and l1. What am I missing?

thanks
mukesh

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [V9 PATCH 6/8] pvh dom0: Add and remove foreign pages
  2014-04-25  2:09                 ` Mukesh Rathor
  2014-04-25  6:49                   ` Jan Beulich
  2014-04-25  8:55                   ` Tim Deegan
@ 2014-04-26  1:34                   ` Mukesh Rathor
  2014-04-28  8:54                     ` Jan Beulich
  2 siblings, 1 reply; 58+ messages in thread
From: Mukesh Rathor @ 2014-04-26  1:34 UTC (permalink / raw)
  To: Mukesh Rathor
  Cc: Jan Beulich, George.Dunlap, Tim Deegan, eddie.dong, keir.xen,
	jun.nakajima, xen-devel

On Thu, 24 Apr 2014 19:09:25 -0700
Mukesh Rathor <mukesh.rathor@oracle.com> wrote:

> On Thu, 24 Apr 2014 11:46:41 +0200
> Tim Deegan <tim@xen.org> wrote:
> 
> > At 19:21 -0700 on 23 Apr (1398277311), Mukesh Rathor wrote:
> > > On Thu, 17 Apr 2014 14:58:42 +0100
> > > "Jan Beulich" <JBeulich@suse.com> wrote:
> > > 
> > > > >>> On 17.04.14 at 14:36, <tim@xen.org> wrote:
> > > > > At 07:50 +0100 on 17 Apr (1397717440), Jan Beulich wrote:
> > > > >> >>> On 17.04.14 at 03:37, <mukesh.rathor@oracle.com> wrote:
> > > > >> > On Wed, 16 Apr 2014 17:00:35 +0100
> > > > >> > "Jan Beulich" <JBeulich@suse.com> wrote:
> > 
> > This goes back to having refcounts open-coded.  Having the refcounts
> > open-coded around the atomic_write_ept_entry() in ept_set_entry()
> > means there are now places where the epte can change without
> > maintaining the refcount invariants: ept_change_entry_type_page(),
> > for example.
> 
> Correct, altho, at present I've checks in p2m paths to not allow
> foreign types to come down to such calls.
> 
> > I would _much_ prefer to have atomic_write_ept_entry() DTRT -- it
> > would have to know the difference between leaf and non-leaf entries,
> > and return an error code.  I'd also be OK with having two
> > atomic_write ops, one for leaf and one for non-leaf, with
> > appropriate ASSERT()s on the contents.
> 
> Ok, how about something like shown further below?  (I think
> it would be more simpler to have one atomic_write ops, instead of two)
> 
> One thing, on returning error code, since at present there are no
> paths allowing superpages for foreign types, it appears I'd not need 
> to worry about undoing the ept_split_super_page(), so I added
> assert in there. Sound right?
> 
> Finally, I can leave other callers of atomic_write_ept_entry as is,
> or add ASSERTs for rc == 0. LMK.
> 
> thanks for patience,
> mukesh


Tim/Jan,

Ok, so far:

option 1, Jan's suggestion: 
  - Use my latest patch, but add explicit set of p2m type for intermediate
    entries in ept_set_middle_entry() like Jan said.

    Good: we won't need to then worry about checking for leaf entries.
    Bad: future code can't use the bits for interm entries.

option 2, Tim's suggestion:
  - Use my latest patch, but add sp = 1 in leaf entries. Then use level
    to figure if we are in superpage. Will work too, but will change the
    definition of sp bit, and superpage check will always require checking
    for level. 

option 3:
  - Pass level to atomic_write_ept_entry() and check for foreign only in
    case of level == 0, ie leaf entries where foreign can be set.

    It appears that information is available at all call sites, and would
    be least intrusive in terms of using bits for non-leaf entries.

option 4:
  - each call site checks for level and calls either macro for leaf entry
    or macro for non-leaf entry. same as 3, but the check is in call site.


I'll think of more options over the weekend, but I think I like option 3 
the best. LMK, and I will work on it.

thanks
mukesh

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [V9 PATCH 6/8] pvh dom0: Add and remove foreign pages
  2014-04-26  0:06                     ` Mukesh Rathor
@ 2014-04-28  7:23                       ` Jan Beulich
  0 siblings, 0 replies; 58+ messages in thread
From: Jan Beulich @ 2014-04-28  7:23 UTC (permalink / raw)
  To: Mukesh Rathor
  Cc: George.Dunlap, Tim Deegan, eddie.dong, keir.xen, jun.nakajima, xen-devel

>>> On 26.04.14 at 02:06, <mukesh.rathor@oracle.com> wrote:
> BTW, one thing that is confusing looking at the code is :
> 
> ept_memory_type_changed() calls ept_invalidate_emt with PML4 mfn.
> ept_invalidate_emt walks l4 entries checking for emt and doing
> atomic_write_ept_entry on l4 entries. However, looking at latest 
> intel SDM figure 28-1, vol 3b, it appears those bits are reserved?
> EPT MT is only valid for L3, l2 and l1. What am I missing?

That's the precise intention: We _want_ an EPT_MISCONFIG VM exit,
and hence we write a value here that we know is reserved.

Jan

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [V9 PATCH 6/8] pvh dom0: Add and remove foreign pages
  2014-04-26  1:34                   ` Mukesh Rathor
@ 2014-04-28  8:54                     ` Jan Beulich
  2014-04-28  9:09                       ` Tim Deegan
  0 siblings, 1 reply; 58+ messages in thread
From: Jan Beulich @ 2014-04-28  8:54 UTC (permalink / raw)
  To: Mukesh Rathor
  Cc: George.Dunlap, Tim Deegan, eddie.dong, keir.xen, jun.nakajima, xen-devel

>>> On 26.04.14 at 03:34, <mukesh.rathor@oracle.com> wrote:
> Ok, so far:
> 
> option 1, Jan's suggestion: 
>   - Use my latest patch, but add explicit set of p2m type for intermediate
>     entries in ept_set_middle_entry() like Jan said.
> 
>     Good: we won't need to then worry about checking for leaf entries.
>     Bad: future code can't use the bits for interm entries.
> 
> option 2, Tim's suggestion:
>   - Use my latest patch, but add sp = 1 in leaf entries. Then use level
>     to figure if we are in superpage. Will work too, but will change the
>     definition of sp bit, and superpage check will always require checking
>     for level. 
> 
> option 3:
>   - Pass level to atomic_write_ept_entry() and check for foreign only in
>     case of level == 0, ie leaf entries where foreign can be set.
> 
>     It appears that information is available at all call sites, and would
>     be least intrusive in terms of using bits for non-leaf entries.
> 
> option 4:
>   - each call site checks for level and calls either macro for leaf entry
>     or macro for non-leaf entry. same as 3, but the check is in call site.

I think my preference would go backwards from 4 to 1, but I think
Tim would - like you - prefer 3 over 4, which I wouldn't object to.

Jan

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [V9 PATCH 6/8] pvh dom0: Add and remove foreign pages
  2014-04-28  8:54                     ` Jan Beulich
@ 2014-04-28  9:09                       ` Tim Deegan
  0 siblings, 0 replies; 58+ messages in thread
From: Tim Deegan @ 2014-04-28  9:09 UTC (permalink / raw)
  To: Jan Beulich; +Cc: George.Dunlap, eddie.dong, keir.xen, jun.nakajima, xen-devel

At 09:54 +0100 on 28 Apr (1398675248), Jan Beulich wrote:
> >>> On 26.04.14 at 03:34, <mukesh.rathor@oracle.com> wrote:
> > Ok, so far:
> > 
> > option 1, Jan's suggestion: 
> >   - Use my latest patch, but add explicit set of p2m type for intermediate
> >     entries in ept_set_middle_entry() like Jan said.
> > 
> >     Good: we won't need to then worry about checking for leaf entries.
> >     Bad: future code can't use the bits for interm entries.
> > 
> > option 2, Tim's suggestion:
> >   - Use my latest patch, but add sp = 1 in leaf entries. Then use level
> >     to figure if we are in superpage. Will work too, but will change the
> >     definition of sp bit, and superpage check will always require checking
> >     for level. 
> > 
> > option 3:
> >   - Pass level to atomic_write_ept_entry() and check for foreign only in
> >     case of level == 0, ie leaf entries where foreign can be set.
> > 
> >     It appears that information is available at all call sites, and would
> >     be least intrusive in terms of using bits for non-leaf entries.
> > 
> > option 4:
> >   - each call site checks for level and calls either macro for leaf entry
> >     or macro for non-leaf entry. same as 3, but the check is in call site.
> 
> I think my preference would go backwards from 4 to 1, but I think
> Tim would - like you - prefer 3 over 4, which I wouldn't object to.

My order would be 2, 3, 4, 1, but really only objecting to #1; so that
sounds like consensus for option 3.

Tim.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [V9 PATCH 7/8] pvh dom0: check for vioapic null ptr in vioapic_range
  2014-04-24  6:49                   ` Jan Beulich
  2014-04-24 23:28                     ` Mukesh Rathor
@ 2014-05-06  0:19                     ` Mukesh Rathor
  2014-05-06  7:44                       ` Jan Beulich
  1 sibling, 1 reply; 58+ messages in thread
From: Mukesh Rathor @ 2014-05-06  0:19 UTC (permalink / raw)
  To: Jan Beulich
  Cc: George.Dunlap, tim, eddie.dong, keir.xen, jun.nakajima, xen-devel

On Thu, 24 Apr 2014 07:49:44 +0100
"Jan Beulich" <JBeulich@suse.com> wrote:

> >>> On 23.04.14 at 23:18, <mukesh.rathor@oracle.com> wrote:
> > On Wed, 23 Apr 2014 10:07:25 +0100
> > "Jan Beulich" <JBeulich@suse.com> wrote:
> > 
> >> >>> On 23.04.14 at 02:11, <mukesh.rathor@oracle.com> wrote:
> >> > On Tue, 22 Apr 2014 08:33:29 +0100
> >> > "Jan Beulich" <JBeulich@suse.com> wrote:
> >> >> >>> On 22.04.14 at 02:59, <mukesh.rathor@oracle.com> wrote:
> > ......
.....
> >> >   So it must have been the third one that I had observed the
> >> >   vioapic_range crash in a while ago, and had made note of it.
> >> > Looking at it:
> >> > 
> >> >     if ( (p2mt == p2m_mmio_dm) ||
> >> >          (access_w && (p2mt == p2m_ram_ro)) )
> >> >     {
> >> >         put_gfn(p2m->domain, gfn);
> >> >         if ( !handle_mmio() )
> >> > 
> >> > doesn't seem apply to domu. Unfortunately, I can't reproduce it
> >> > now so maybe it was an ept violation due to some bug, and a
> >> > crash in vioapic_range before printing the gfn/mfns etc by
> >> > ept_handle_violation made me make a note to put a check in it.
> >> 
> >> Which makes me think that we don't need the patch at all.
> > 
> > Well, without this patch, in case of dom0 EPT violation, dom0 will
> > not die gracefully printing gfn/mfn/etc.. info. But instead it will
> > show fault in vioapic_range. 
> > 
> > 
> > ept_handle_violation() 
> >        hvm_hap_nested_page_fault()
> >              -> handle_mmio() -----> vioapic_range() : KABOOM!!
> > 
> >        gdprintk(XENLOG_ERR, "EPT violation %#lx (%c%c%c/%c%c%c), "
> >                     "gpa %#"PRIpaddr", mfn %#lx, type %i.\n",
> >                                  qualification,  <=== NOT REACHED
> >           .......

... 

> So we're moving in circles I'm afraid: You told us that I/O emulation
> is being handled by a separate path from HVM's, which means either
> handle_mmio() separates the cases itself, or doesn't even get
> called, only to then again show us the call sequence above. One
> of the two statements can't be correct, and what I'd like you to do
> about it depends on which one it is.

Ok, nailed it!! The issue is guest causing EPT violation because
of bad pfn in it's pte, not for mmio emulation.

ept_handle_violation calls hvm_hap_nested_page_fault() which has
three calls to handle_mmio(). The first two are skipped for pvh,
but before the 3rd call, the function does:

    mfn = get_gfn_type_access(p2m, gfn, &p2mt, &p2ma,
                              P2M_ALLOC | (access_w ? P2M_UNSHARE : 0), NULL);

Since, the pfn is invalid, this returns p2m_mmio_dm which is the
default return for non-ram for, I think, historical reasons: 

    ept_get_entry():
          *t = p2m_mmio_dm;

This causes 3rd handle_mmio in hvm_hap_nested_page_fault to be called for pvh:

    if ( (p2mt == p2m_mmio_dm) ||
         (access_w && (p2mt == p2m_ram_ro)) )
    {
        put_gfn(p2m->domain, gfn);
        if ( !handle_mmio() )  <==========
            hvm_inject_hw_exception(TRAP_gp_fault, 0);

which would result in vioapic_range panic in xen.

I was on the right track before, just couldn't come up with where 
p2m_mmio_dm was coming from. Anyways, so:


 1. We can go with the patch that adds pvh_mmio_handlers in anticipation 
    of future msix, hpet support:

+static const struct hvm_mmio_handler *const
+pvh_mmio_handlers[HVM_MMIO_HANDLER_NR] =
...

    That would naturally return X86EMUL_UNHANDLEABLE in this case. 

    OUTCOME: guest would be injected with GPF, but xen won't crash it.


 2. We could go with vioapic null ptr check in vioapic_range() itself,
    thus causing handle_mmio to return X86EMUL_UNHANDLEABLE also.

    OUTCOME: guest would be injected with GPF, but xen won't crash it.


 3. Add pvh check above in hvm_hap_nested_page_fault:

        put_gfn(p2m->domain, gfn);
        if ( is_pvh_vcpu(v) )
        {
            rc = 0;
            goto out;
        }
        if ( !handle_mmio() )  <==========
            hvm_inject_hw_exception(TRAP_gp_fault, 0);
...

    OUTCOME: xen would crash guest with the nice ept violation message.


 4. Add check for pvh in handle_mmio:

 int handle_mmio(void)
 {
      int rc;

      if ( is_pvh_vcpu(current) )
          return X86EMUL_UNHANDLEABLE;
     ... 

    OUTCOME: guest would be injected with GPF, but xen won't crash it.


 5. Do 1/2/4 above, but in addition, in hvm_hap_nested_page_fault() do:

    if ( (p2mt == p2m_mmio_dm) ||
         (access_w && (p2mt == p2m_ram_ro)) )
    {
        put_gfn(p2m->domain, gfn);
        rc = 1;
        if ( !handle_mmio() )  <==========
        {
            if ( is_pvh_vcpu )
                rc = 0;
            else
                hvm_inject_hw_exception(TRAP_gp_fault, 0);
        }
        goto out;
    }

    OUTCOME: xen would crash guest with the nice ept violation message
             that prints all the details.

Please lmk your thoughts and preference, and I'll submit patch. The patch
would not be part of dom0 pvh series, as a pvh domU ept violation
could cause this xen panic too in vioapic_range. We may wanna backport
it too (not sure because of experimental nature of the feature).

thanks,
Mukesh

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [V9 PATCH 7/8] pvh dom0: check for vioapic null ptr in vioapic_range
  2014-05-06  0:19                     ` Mukesh Rathor
@ 2014-05-06  7:44                       ` Jan Beulich
  2014-05-07  1:07                         ` Mukesh Rathor
  0 siblings, 1 reply; 58+ messages in thread
From: Jan Beulich @ 2014-05-06  7:44 UTC (permalink / raw)
  To: Mukesh Rathor
  Cc: George.Dunlap, tim, eddie.dong, keir.xen, jun.nakajima, xen-devel

>>> On 06.05.14 at 02:19, <mukesh.rathor@oracle.com> wrote:
>  1. We can go with the patch that adds pvh_mmio_handlers in anticipation 
>     of future msix, hpet support:
> 
> +static const struct hvm_mmio_handler *const
> +pvh_mmio_handlers[HVM_MMIO_HANDLER_NR] =
> ...
> 
>     That would naturally return X86EMUL_UNHANDLEABLE in this case. 
> 
>     OUTCOME: guest would be injected with GPF, but xen won't crash it.

That's pretty odd a result from a nested paging fault, but perhaps
acceptable/reasonable for PVH. Of course such should never happen
for HVM. And I wonder whether a virtualization exception (vector 20)
wouldn't be more natural here (delivered directly via hardware if
support for this is available and there isn't anything preventing the
delivery).

>  2. We could go with vioapic null ptr check in vioapic_range() itself,
>     thus causing handle_mmio to return X86EMUL_UNHANDLEABLE also.
> 
>     OUTCOME: guest would be injected with GPF, but xen won't crash it.
> 
> 
>  3. Add pvh check above in hvm_hap_nested_page_fault:
> 
>         put_gfn(p2m->domain, gfn);
>         if ( is_pvh_vcpu(v) )
>         {
>             rc = 0;
>             goto out;
>         }
>         if ( !handle_mmio() )  <==========
>             hvm_inject_hw_exception(TRAP_gp_fault, 0);
> ...
> 
>     OUTCOME: xen would crash guest with the nice ept violation message.
> 
> 
>  4. Add check for pvh in handle_mmio:
> 
>  int handle_mmio(void)
>  {
>       int rc;
> 
>       if ( is_pvh_vcpu(current) )
>           return X86EMUL_UNHANDLEABLE;
>      ... 
> 
>     OUTCOME: guest would be injected with GPF, but xen won't crash it.
> 
> 
>  5. Do 1/2/4 above, but in addition, in hvm_hap_nested_page_fault() do:
> 
>     if ( (p2mt == p2m_mmio_dm) ||
>          (access_w && (p2mt == p2m_ram_ro)) )
>     {
>         put_gfn(p2m->domain, gfn);
>         rc = 1;
>         if ( !handle_mmio() )  <==========
>         {
>             if ( is_pvh_vcpu )
>                 rc = 0;
>             else
>                 hvm_inject_hw_exception(TRAP_gp_fault, 0);
>         }
>         goto out;
>     }
> 
>     OUTCOME: xen would crash guest with the nice ept violation message
>              that prints all the details.
> 
> Please lmk your thoughts and preference, and I'll submit patch. The patch
> would not be part of dom0 pvh series, as a pvh domU ept violation
> could cause this xen panic too in vioapic_range. We may wanna backport
> it too (not sure because of experimental nature of the feature).

So my first preference would be #VE to be delivered to the guest. If
such can't be delivered, I guess killing the guest is more natural than
injecting #GP - we may want/need to make support for #VE a
requirement for PVH guests.

Alternatively, whether handle_mmio() itself filters out PVH, or whether
its callers take care to not call it in that case largely depends on what
would produce cleaner code - i.e. if all but one of the code paths
already avoid calling the function, I would think the remaining one
should do so too unless moving the filter into the function would allow
cleaning up the other call sites.

That not withstanding I think we will want/need pvh_mmio_handlers[]
at some point anyway - if that's going to be multiplexed inside
handle_mmio(), then preventing the function to be called would be the
wrong thing now (as that would then later need to be undone, and
hence would better get done right from the beginning).

What I'm definitely against - as said before - is option 2.

Jan

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [V9 PATCH 7/8] pvh dom0: check for vioapic null ptr in vioapic_range
  2014-05-06  7:44                       ` Jan Beulich
@ 2014-05-07  1:07                         ` Mukesh Rathor
  2014-05-07  6:47                           ` Jan Beulich
  0 siblings, 1 reply; 58+ messages in thread
From: Mukesh Rathor @ 2014-05-07  1:07 UTC (permalink / raw)
  To: Jan Beulich
  Cc: George.Dunlap, tim, eddie.dong, keir.xen, jun.nakajima, xen-devel

On Tue, 06 May 2014 08:44:25 +0100
"Jan Beulich" <JBeulich@suse.com> wrote:

> >>> On 06.05.14 at 02:19, <mukesh.rathor@oracle.com> wrote:
> >  1. We can go with the patch that adds pvh_mmio_handlers in
> > anticipation of future msix, hpet support:
> > 
> > +static const struct hvm_mmio_handler *const
> > +pvh_mmio_handlers[HVM_MMIO_HANDLER_NR] =
> > ...
> > 
> >     That would naturally return X86EMUL_UNHANDLEABLE in this case. 
> > 
> >     OUTCOME: guest would be injected with GPF, but xen won't crash
> > it.
> 
> That's pretty odd a result from a nested paging fault, but perhaps
> acceptable/reasonable for PVH. Of course such should never happen
> for HVM. And I wonder whether a virtualization exception (vector 20)
> wouldn't be more natural here (delivered directly via hardware if
> support for this is available and there isn't anything preventing the
> delivery).
> 
> >  2. We could go with vioapic null ptr check in vioapic_range()
> > itself, thus causing handle_mmio to return X86EMUL_UNHANDLEABLE
> > also.
> > 
> >     OUTCOME: guest would be injected with GPF, but xen won't crash
> > it.
> > 
> > 
> >  3. Add pvh check above in hvm_hap_nested_page_fault:
> > 
> >         put_gfn(p2m->domain, gfn);
> >         if ( is_pvh_vcpu(v) )
> >         {
> >             rc = 0;
> >             goto out;
> >         }
> >         if ( !handle_mmio() )  <==========
> >             hvm_inject_hw_exception(TRAP_gp_fault, 0);
> > ...
> > 
> >     OUTCOME: xen would crash guest with the nice ept violation
> > message.
> > 
> > 
> >  4. Add check for pvh in handle_mmio:
> > 
> >  int handle_mmio(void)
> >  {
> >       int rc;
> > 
> >       if ( is_pvh_vcpu(current) )
> >           return X86EMUL_UNHANDLEABLE;
> >      ... 
> > 
> >     OUTCOME: guest would be injected with GPF, but xen won't crash
> > it.
> > 
> > 
> >  5. Do 1/2/4 above, but in addition, in hvm_hap_nested_page_fault()
> > do:
> > 
> >     if ( (p2mt == p2m_mmio_dm) ||
> >          (access_w && (p2mt == p2m_ram_ro)) )
> >     {
> >         put_gfn(p2m->domain, gfn);
> >         rc = 1;
> >         if ( !handle_mmio() )  <==========
> >         {
> >             if ( is_pvh_vcpu )
> >                 rc = 0;
> >             else
> >                 hvm_inject_hw_exception(TRAP_gp_fault, 0);
> >         }
> >         goto out;
> >     }
> > 
> >     OUTCOME: xen would crash guest with the nice ept violation
> > message that prints all the details.
> > 
> > Please lmk your thoughts and preference, and I'll submit patch. The
> > patch would not be part of dom0 pvh series, as a pvh domU ept
> > violation could cause this xen panic too in vioapic_range. We may
> > wanna backport it too (not sure because of experimental nature of
> > the feature).
> 
> So my first preference would be #VE to be delivered to the guest. If
> such can't be delivered, I guess killing the guest is more natural
> than injecting #GP - we may want/need to make support for #VE a
> requirement for PVH guests.

#VE support doesn't appear to be there in guest right now. So, let
me not get too distracted, we've other higher prirority fixmes, not to 
mention other supports like amd, migration, 32bit etc I'd like
to address right after this series goes in.

> Alternatively, whether handle_mmio() itself filters out PVH, or
> whether its callers take care to not call it in that case largely
> depends on what would produce cleaner code - i.e. if all but one of
> the code paths already avoid calling the function, I would think the
> remaining one should do so too unless moving the filter into the
> function would allow cleaning up the other call sites.
> 
> That not withstanding I think we will want/need pvh_mmio_handlers[]
> at some point anyway - if that's going to be multiplexed inside
> handle_mmio(), then preventing the function to be called would be the
> wrong thing now (as that would then later need to be undone, and
> hence would better get done right from the beginning).
> 
> What I'm definitely against - as said before - is option 2.

Ok, so looks like you are ok with #3. We can add pvh_mmio_handlers when
adding the msix support so it gets done properly as it needs some work
on guest side too from what I recall. #3 would be easy to backport also.

thanks
Mukesh

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [V9 PATCH 7/8] pvh dom0: check for vioapic null ptr in vioapic_range
  2014-05-07  1:07                         ` Mukesh Rathor
@ 2014-05-07  6:47                           ` Jan Beulich
  2014-05-07 23:52                             ` Mukesh Rathor
  0 siblings, 1 reply; 58+ messages in thread
From: Jan Beulich @ 2014-05-07  6:47 UTC (permalink / raw)
  To: Mukesh Rathor
  Cc: George.Dunlap, tim, eddie.dong, keir.xen, jun.nakajima, xen-devel

>>> On 07.05.14 at 03:07, <mukesh.rathor@oracle.com> wrote:
> On Tue, 06 May 2014 08:44:25 +0100
> "Jan Beulich" <JBeulich@suse.com> wrote:
>> So my first preference would be #VE to be delivered to the guest. If
>> such can't be delivered, I guess killing the guest is more natural
>> than injecting #GP - we may want/need to make support for #VE a
>> requirement for PVH guests.
> 
> #VE support doesn't appear to be there in guest right now. So, let
> me not get too distracted, we've other higher prirority fixmes, not to 
> mention other supports like amd, migration, 32bit etc I'd like
> to address right after this series goes in.

Seems like we're once again having some disagreement in terms of
how to approach problems: If a proper solution requires more
extensive work, then to me this not a reason to discard it without
any further evaluation. You, otoh, seem to prefer adding more
workarounds and fixmes. If you were to tell me that the solution
isn't suitable for technical reasons, that would be a different thing.

Jan

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [V9 PATCH 7/8] pvh dom0: check for vioapic null ptr in vioapic_range
  2014-05-07  6:47                           ` Jan Beulich
@ 2014-05-07 23:52                             ` Mukesh Rathor
  2014-05-08  6:33                               ` Jan Beulich
  0 siblings, 1 reply; 58+ messages in thread
From: Mukesh Rathor @ 2014-05-07 23:52 UTC (permalink / raw)
  To: Jan Beulich
  Cc: George.Dunlap, tim, eddie.dong, keir.xen, jun.nakajima, xen-devel

On Wed, 07 May 2014 07:47:31 +0100
"Jan Beulich" <JBeulich@suse.com> wrote:

> >>> On 07.05.14 at 03:07, <mukesh.rathor@oracle.com> wrote:
> > On Tue, 06 May 2014 08:44:25 +0100
> > "Jan Beulich" <JBeulich@suse.com> wrote:
> >> So my first preference would be #VE to be delivered to the guest.
> >> If such can't be delivered, I guess killing the guest is more
> >> natural than injecting #GP - we may want/need to make support for
> >> #VE a requirement for PVH guests.
> > 
> > #VE support doesn't appear to be there in guest right now. So, let
> > me not get too distracted, we've other higher prirority fixmes, not
> > to mention other supports like amd, migration, 32bit etc I'd like
> > to address right after this series goes in.
> 
> Seems like we're once again having some disagreement in terms of
> how to approach problems: If a proper solution requires more
> extensive work, then to me this not a reason to discard it without
> any further evaluation. You, otoh, seem to prefer adding more
> workarounds and fixmes. If you were to tell me that the solution
> isn't suitable for technical reasons, that would be a different thing.

What I am trying to tell is that I don't see #VE support in linux right
now. Upstreaming that in my experience could be anywhere from 2 months
to year depending on maintainers! Meanwhile, xen will crash if a pvh 
guest causes ept violation.

thanks
mukesh

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [V9 PATCH 7/8] pvh dom0: check for vioapic null ptr in vioapic_range
  2014-05-07 23:52                             ` Mukesh Rathor
@ 2014-05-08  6:33                               ` Jan Beulich
  0 siblings, 0 replies; 58+ messages in thread
From: Jan Beulich @ 2014-05-08  6:33 UTC (permalink / raw)
  To: Mukesh Rathor
  Cc: George.Dunlap, tim, eddie.dong, keir.xen, jun.nakajima, xen-devel

>>> On 08.05.14 at 01:52, <mukesh.rathor@oracle.com> wrote:
> What I am trying to tell is that I don't see #VE support in linux right
> now. Upstreaming that in my experience could be anywhere from 2 months
> to year depending on maintainers! Meanwhile, xen will crash if a pvh 
> guest causes ept violation.

That last aspect needs addressing, no question. But PVH is
experimental anyway, so fixing it by breaking upstream Linux is
_not_ a problem (and remember that I had always pointed out that
Linux support should be upstreamed only once the hypervisor
interface is completely understood, defined, and sufficiently
implemented, or the functionality should be called/marked
experimental [and hence subject to incompatible changes] there
too). I.e. injecting #VE into guests can very well be the solution to
the problem - for one, guests can avoid these exceptions from being
raised by not touching non-existent memory, and second it's clearly
better for guests than for the host to crash.

Jan

^ permalink raw reply	[flat|nested] 58+ messages in thread

end of thread, other threads:[~2014-05-08  6:33 UTC | newest]

Thread overview: 58+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-04-16  0:12 [V9 PATCH 0/8] pvh dom0 patches Mukesh Rathor
2014-04-16  0:12 ` [V9 PATCH 1/8] pvh dom0: move some pv specific code to static functions Mukesh Rathor
2014-04-16  0:12 ` [V9 PATCH 2/8] pvh dom0: construct_dom0 changes Mukesh Rathor
2014-04-16  0:12 ` [V9 PATCH 3/8] pvh dom0: Introduce p2m_map_foreign Mukesh Rathor
2014-04-16  0:12 ` [V9 PATCH 4/8] pvh dom0: Add checks and restrictions for p2m_is_foreign Mukesh Rathor
2014-04-16 15:28   ` Jan Beulich
2014-04-16  0:12 ` [V9 PATCH 5/8] pvh dom0: make xsm_map_gmfn_foreign available for x86 Mukesh Rathor
2014-04-16 14:29   ` Daniel De Graaf
2014-04-16  0:12 ` [V9 PATCH 6/8] pvh dom0: Add and remove foreign pages Mukesh Rathor
2014-04-16 16:00   ` Jan Beulich
2014-04-17  1:37     ` Mukesh Rathor
2014-04-17  6:50       ` Jan Beulich
2014-04-17 12:36         ` Tim Deegan
2014-04-17 13:58           ` Jan Beulich
2014-04-19  0:59             ` Mukesh Rathor
2014-04-21 16:10               ` Jan Beulich
2014-04-24  2:21             ` Mukesh Rathor
2014-04-24  6:44               ` Jan Beulich
2014-04-24  9:46               ` Tim Deegan
2014-04-25  2:09                 ` Mukesh Rathor
2014-04-25  6:49                   ` Jan Beulich
2014-04-25 23:23                     ` Mukesh Rathor
2014-04-26  0:06                     ` Mukesh Rathor
2014-04-28  7:23                       ` Jan Beulich
2014-04-25  8:55                   ` Tim Deegan
2014-04-25 23:29                     ` Mukesh Rathor
2014-04-26  1:34                   ` Mukesh Rathor
2014-04-28  8:54                     ` Jan Beulich
2014-04-28  9:09                       ` Tim Deegan
2014-04-22  0:19       ` Mukesh Rathor
2014-04-22  7:28         ` Jan Beulich
2014-04-23  0:28           ` Mukesh Rathor
2014-04-23  9:03             ` Jan Beulich
2014-04-23 16:13               ` Andres Lagar-Cavilla
2014-04-24 16:37                 ` Tim Deegan
2014-04-16  0:12 ` [V9 PATCH 7/8] pvh dom0: check for vioapic null ptr in vioapic_range Mukesh Rathor
2014-04-16 16:05   ` Jan Beulich
2014-04-17  1:44     ` Mukesh Rathor
2014-04-17  6:54       ` Jan Beulich
2014-04-22  0:59         ` Mukesh Rathor
2014-04-22  7:33           ` Jan Beulich
2014-04-23  0:11             ` Mukesh Rathor
2014-04-23  9:07               ` Jan Beulich
2014-04-23 21:18                 ` Mukesh Rathor
2014-04-24  6:49                   ` Jan Beulich
2014-04-24 23:28                     ` Mukesh Rathor
2014-05-06  0:19                     ` Mukesh Rathor
2014-05-06  7:44                       ` Jan Beulich
2014-05-07  1:07                         ` Mukesh Rathor
2014-05-07  6:47                           ` Jan Beulich
2014-05-07 23:52                             ` Mukesh Rathor
2014-05-08  6:33                               ` Jan Beulich
2014-04-16  0:12 ` [V9 PATCH 8/8] pvh dom0: add opt_dom0pvh to setup.c Mukesh Rathor
2014-04-16 12:57   ` Konrad Rzeszutek Wilk
2014-04-16 13:01   ` Andrew Cooper
2014-04-16 16:09   ` Jan Beulich
2014-04-16 14:57 ` [V9 PATCH 0/8] pvh dom0 patches Roger Pau Monné
2014-04-16 21:15   ` Mukesh Rathor

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.