All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mukesh Rathor <mukesh.rathor@oracle.com>
To: Xen-devel@lists.xensource.com
Cc: george.dunlap@eu.citrix.com, keir.xen@gmail.com, tim@xen.org,
	JBeulich@suse.com
Subject: [V6 PATCH 2/7] pvh dom0: construct_dom0 changes
Date: Thu,  5 Dec 2013 18:38:39 -0800	[thread overview]
Message-ID: <1386297524-15483-3-git-send-email-mukesh.rathor@oracle.com> (raw)
In-Reply-To: <1386297524-15483-1-git-send-email-mukesh.rathor@oracle.com>

This patch changes construct_dom0 to boot in PVH mode. Changes
need to support it are also included here.

Signed-off-by: Mukesh Rathor <mukesh.rathor@oracle.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
---
 xen/arch/x86/domain_build.c |  235 +++++++++++++++++++++++++++++++++++++++---
 xen/arch/x86/mm/hap/hap.c   |   15 +++
 xen/include/asm-x86/hap.h   |    1 +
 3 files changed, 234 insertions(+), 17 deletions(-)

diff --git a/xen/arch/x86/domain_build.c b/xen/arch/x86/domain_build.c
index f3ae9ad..a563c61 100644
--- a/xen/arch/x86/domain_build.c
+++ b/xen/arch/x86/domain_build.c
@@ -35,6 +35,7 @@
 #include <asm/setup.h>
 #include <asm/bzimage.h> /* for bzimage_parse */
 #include <asm/io_apic.h>
+#include <asm/hap.h>
 
 #include <public/version.h>
 
@@ -307,6 +308,151 @@ static void __init process_dom0_ioports_disable(void)
     }
 }
 
+static __init void pvh_add_mem_mapping(struct domain *d, unsigned long gfn,
+                                       unsigned long mfn, unsigned long nr_mfns)
+{
+    unsigned long i;
+    for ( i = 0; i < nr_mfns; i++ )
+        if ( !set_mmio_p2m_entry(d, gfn + i, _mfn(mfn + i)) )
+            panic("Failed setting p2m. gfn:%lx mfn:%lx i:%ld\n", gfn, mfn, i);
+}
+
+/*
+ * Set the 1:1 map for all non-RAM regions for dom 0. Thus, dom0 will have
+ * the entire io region mapped in the EPT/NPT.
+ *
+ * pvh fixme: The following doesn't map MMIO ranges when they sit above the
+ *            highest E820 covered address.
+ */
+static __init void pvh_map_all_iomem(struct domain *d)
+{
+    unsigned long start_pfn, end_pfn, end = 0, start = 0;
+    const struct e820entry *entry;
+    unsigned int i, nump;
+
+    for ( i = 0, entry = e820.map; i < e820.nr_map; i++, entry++ )
+    {
+        end = entry->addr + entry->size;
+
+        if ( entry->type == E820_RAM || entry->type == E820_UNUSABLE ||
+             i == e820.nr_map - 1 )
+        {
+            start_pfn = PFN_DOWN(start);
+
+            /* Unused RAM areas are marked UNUSABLE, so skip it too */
+            if ( entry->type == E820_RAM || entry->type == E820_UNUSABLE )
+                end_pfn = PFN_UP(entry->addr);
+            else
+                end_pfn = PFN_UP(end);
+
+            if ( start_pfn < end_pfn )
+            {
+                nump = end_pfn - start_pfn;
+                /* Add pages to the mapping */
+                pvh_add_mem_mapping(d, start_pfn, start_pfn, nump);
+            }
+            start = end;
+        }
+    }
+
+    /* If the e820 ended under 4GB, we must map the remaining space upto 4GB */
+    if ( end < GB(4) )
+    {
+        start_pfn = PFN_UP(end);
+        end_pfn = (GB(4)) >> PAGE_SHIFT;
+        nump = end_pfn - start_pfn;
+        pvh_add_mem_mapping(d, start_pfn, start_pfn, nump);
+    }
+}
+
+static __init void dom0_update_physmap(struct domain *d, unsigned long pfn,
+                                   unsigned long mfn, unsigned long vphysmap_s)
+{
+    if ( is_pvh_domain(d) )
+    {
+        int rc = guest_physmap_add_page(d, pfn, mfn, 0);
+        BUG_ON(rc);
+        return;
+    }
+    if ( !is_pv_32on64_domain(d) )
+        ((unsigned long *)vphysmap_s)[pfn] = mfn;
+    else
+        ((unsigned int *)vphysmap_s)[pfn] = mfn;
+
+    set_gpfn_from_mfn(mfn, pfn);
+}
+
+static __init void pvh_fixup_page_tables_for_hap(struct vcpu *v,
+                                                 unsigned long v_start,
+                                                 unsigned long v_end)
+{
+    int i, j, k;
+    l4_pgentry_t *pl4e, *l4start;
+    l3_pgentry_t *pl3e;
+    l2_pgentry_t *pl2e;
+    l1_pgentry_t *pl1e;
+    unsigned long cr3_pfn;
+
+    ASSERT(paging_mode_enabled(v->domain));
+
+    l4start = map_domain_page(pagetable_get_pfn(v->arch.guest_table));
+
+    /* Clear entries prior to guest L4 start */
+    pl4e = l4start + l4_table_offset(v_start);
+    memset(l4start, 0, (unsigned long)pl4e - (unsigned long)l4start);
+
+    for ( ; pl4e <= l4start + l4_table_offset(v_end - 1); pl4e++ )
+    {
+        pl3e = map_l3t_from_l4e(*pl4e);
+        for ( i = 0; i < PAGE_SIZE / sizeof(*pl3e); i++, pl3e++ )
+        {
+            if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) )
+                continue;
+
+            pl2e = map_l2t_from_l3e(*pl3e);
+            for ( j = 0; j < PAGE_SIZE / sizeof(*pl2e); j++, pl2e++ )
+            {
+                if ( !(l2e_get_flags(*pl2e)  & _PAGE_PRESENT) )
+                    continue;
+
+                pl1e = map_l1t_from_l2e(*pl2e);
+                for ( k = 0; k < PAGE_SIZE / sizeof(*pl1e); k++, pl1e++ )
+                {
+                    if ( !(l1e_get_flags(*pl1e) & _PAGE_PRESENT) )
+                        continue;
+
+                    *pl1e = l1e_from_pfn(get_gpfn_from_mfn(l1e_get_pfn(*pl1e)),
+                                         l1e_get_flags(*pl1e));
+                }
+                unmap_domain_page(pl1e);
+                *pl2e = l2e_from_pfn(get_gpfn_from_mfn(l2e_get_pfn(*pl2e)),
+                                     l2e_get_flags(*pl2e));
+            }
+            unmap_domain_page(pl2e);
+            *pl3e = l3e_from_pfn(get_gpfn_from_mfn(l3e_get_pfn(*pl3e)),
+                                 l3e_get_flags(*pl3e));
+        }
+        unmap_domain_page(pl3e);
+        *pl4e = l4e_from_pfn(get_gpfn_from_mfn(l4e_get_pfn(*pl4e)),
+                             l4e_get_flags(*pl4e));
+    }
+
+    /* Clear entries post guest L4. */
+    if ( (unsigned long)pl4e & (PAGE_SIZE - 1) )
+        memset(pl4e, 0, PAGE_SIZE - ((unsigned long)pl4e & (PAGE_SIZE - 1)));
+
+    unmap_domain_page(l4start);
+
+    cr3_pfn = get_gpfn_from_mfn(paddr_to_pfn(v->arch.cr3));
+    v->arch.hvm_vcpu.guest_cr[3] = pfn_to_paddr(cr3_pfn);
+
+    /*
+     * Finally, we update the paging modes (hap_update_paging_modes). This will
+     * create monitor_table for us, update v->arch.cr3, and update vmcs.cr3.
+     */
+    paging_update_paging_modes(v);
+}
+
 static __init void mark_pv_pt_pages_rdonly(struct domain *d,
                                            l4_pgentry_t *l4start,
                                            unsigned long vpt_start,
@@ -516,6 +662,8 @@ int __init construct_dom0(
     l3_pgentry_t *l3tab = NULL, *l3start = NULL;
     l2_pgentry_t *l2tab = NULL, *l2start = NULL;
     l1_pgentry_t *l1tab = NULL, *l1start = NULL;
+    paddr_t shared_info_paddr = 0;
+    u32 save_pvh_pg_mode = 0;
 
     /*
      * This fully describes the memory layout of the initial domain. All 
@@ -593,12 +741,21 @@ int __init construct_dom0(
         goto out;
     }
 
-    if ( parms.elf_notes[XEN_ELFNOTE_SUPPORTED_FEATURES].type != XEN_ENT_NONE &&
-         !test_bit(XENFEAT_dom0, parms.f_supported) )
+    if ( parms.elf_notes[XEN_ELFNOTE_SUPPORTED_FEATURES].type != XEN_ENT_NONE )
     {
-        printk("Kernel does not support Dom0 operation\n");
-        rc = -EINVAL;
-        goto out;
+        if ( !test_bit(XENFEAT_dom0, parms.f_supported) )
+        {
+            printk("Kernel does not support Dom0 operation\n");
+            rc = -EINVAL;
+            goto out;
+        }
+        if ( is_pvh_domain(d) &&
+             !test_bit(XENFEAT_hvm_callback_vector, parms.f_supported) )
+        {
+            printk("Kernel does not support PVH mode\n");
+            rc = -EINVAL;
+            goto out;
+        }
     }
 
     if ( compat32 )
@@ -663,6 +820,13 @@ int __init construct_dom0(
     vstartinfo_end   = (vstartinfo_start +
                         sizeof(struct start_info) +
                         sizeof(struct dom0_vga_console_info));
+
+    if ( is_pvh_domain(d) )
+    {
+        shared_info_paddr = round_pgup(vstartinfo_end) - v_start;
+        vstartinfo_end   += PAGE_SIZE;
+    }
+
     vpt_start        = round_pgup(vstartinfo_end);
     for ( nr_pt_pages = 2; ; nr_pt_pages++ )
     {
@@ -903,6 +1067,13 @@ int __init construct_dom0(
         (void)alloc_vcpu(d, i, cpu);
     }
 
+    /*
+     * pvh: we temporarily disable paging mode so that we can build cr3 needed
+     * to run on dom0's page tables.
+     */
+    save_pvh_pg_mode = d->arch.paging.mode;
+    d->arch.paging.mode = 0;
+
     /* Set up CR3 value for write_ptbase */
     if ( paging_mode_enabled(d) )
         paging_update_paging_modes(v);
@@ -969,6 +1140,15 @@ int __init construct_dom0(
                          nr_pages);
     }
 
+    if ( is_pvh_domain(d) )
+        hap_set_pvh_alloc_for_dom0(d, nr_pages);
+
+    /*
+     * We enable paging mode again so guest_physmap_add_page will do the
+     * right thing for us.
+     */
+    d->arch.paging.mode = save_pvh_pg_mode;
+
     /* Write the phys->machine and machine->phys table entries. */
     for ( pfn = 0; pfn < count; pfn++ )
     {
@@ -985,11 +1165,7 @@ int __init construct_dom0(
         if ( pfn > REVERSE_START && (vinitrd_start || pfn < initrd_pfn) )
             mfn = alloc_epfn - (pfn - REVERSE_START);
 #endif
-        if ( !is_pv_32on64_domain(d) )
-            ((unsigned long *)vphysmap_start)[pfn] = mfn;
-        else
-            ((unsigned int *)vphysmap_start)[pfn] = mfn;
-        set_gpfn_from_mfn(mfn, pfn);
+        dom0_update_physmap(d, pfn, mfn, vphysmap_start);
         if (!(pfn & 0xfffff))
             process_pending_softirqs();
     }
@@ -1005,8 +1181,8 @@ int __init construct_dom0(
             if ( !page->u.inuse.type_info &&
                  !get_page_and_type(page, d, PGT_writable_page) )
                 BUG();
-            ((unsigned long *)vphysmap_start)[pfn] = mfn;
-            set_gpfn_from_mfn(mfn, pfn);
+
+            dom0_update_physmap(d, pfn, mfn, vphysmap_start);
             ++pfn;
             if (!(pfn & 0xfffff))
                 process_pending_softirqs();
@@ -1026,11 +1202,7 @@ int __init construct_dom0(
 #ifndef NDEBUG
 #define pfn (nr_pages - 1 - (pfn - (alloc_epfn - alloc_spfn)))
 #endif
-            if ( !is_pv_32on64_domain(d) )
-                ((unsigned long *)vphysmap_start)[pfn] = mfn;
-            else
-                ((unsigned int *)vphysmap_start)[pfn] = mfn;
-            set_gpfn_from_mfn(mfn, pfn);
+            dom0_update_physmap(d, pfn, mfn, vphysmap_start);
 #undef pfn
             page++; pfn++;
             if (!(pfn & 0xfffff))
@@ -1054,6 +1226,15 @@ int __init construct_dom0(
         si->console.dom0.info_size = sizeof(struct dom0_vga_console_info);
     }
 
+    /*
+     * PVH: We need to update si->shared_info while we are on dom0 page tables,
+     * but need to defer the p2m update until after we have fixed up the
+     * page tables for PVH so that the m2p for the si pte entry returns
+     * correct pfn.
+     */
+    if ( is_pvh_domain(d) )
+        si->shared_info = shared_info_paddr;
+
     if ( is_pv_32on64_domain(d) )
         xlat_start_info(si, XLAT_start_info_console_dom0);
 
@@ -1087,8 +1268,15 @@ int __init construct_dom0(
     regs->eflags = X86_EFLAGS_IF;
 
     if ( opt_dom0_shadow )
+    {
+        if ( is_pvh_domain(d) )
+        {
+            printk("Unsupported option dom0_shadow for PVH\n");
+            return -EINVAL;
+        }
         if ( paging_enable(d, PG_SH_enable) == 0 ) 
             paging_update_paging_modes(v);
+    }
 
     if ( supervisor_mode_kernel )
     {
@@ -1178,6 +1366,19 @@ int __init construct_dom0(
         printk(" Xen warning: dom0 kernel broken ELF: %s\n",
                elf_check_broken(&elf));
 
+    if ( is_pvh_domain(d) )
+    {
+        /* finally, fixup the page table, replacing mfns with pfns */
+        pvh_fixup_page_tables_for_hap(v, v_start, v_end);
+
+        /* the pt has correct pfn for si, now update the mfn in the p2m */
+        mfn = virt_to_mfn(d->shared_info);
+        pfn = shared_info_paddr >> PAGE_SHIFT;
+        dom0_update_physmap(d, pfn, mfn, 0);
+
+        pvh_map_all_iomem(d);
+    }
+
     iommu_dom0_init(dom0);
     return 0;
 
diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
index d3f64bd..cc3ba66 100644
--- a/xen/arch/x86/mm/hap/hap.c
+++ b/xen/arch/x86/mm/hap/hap.c
@@ -579,6 +579,21 @@ int hap_domctl(struct domain *d, xen_domctl_shadow_op_t *sc,
     }
 }
 
+void __init hap_set_pvh_alloc_for_dom0(struct domain *d,
+                                       unsigned long num_pages)
+{
+    int rc;
+    unsigned long memkb = num_pages * (PAGE_SIZE / 1024);
+
+    /* Copied from: libxl_get_required_shadow_memory() */
+    memkb = 4 * (256 * d->max_vcpus + 2 * (memkb / 1024));
+    num_pages = ( (memkb + 1023) / 1024) << (20 - PAGE_SHIFT);
+    paging_lock(d);
+    rc = hap_set_allocation(d, num_pages, NULL);
+    paging_unlock(d);
+    BUG_ON(rc);
+}
+
 static const struct paging_mode hap_paging_real_mode;
 static const struct paging_mode hap_paging_protected_mode;
 static const struct paging_mode hap_paging_pae_mode;
diff --git a/xen/include/asm-x86/hap.h b/xen/include/asm-x86/hap.h
index e03f983..aab8558 100644
--- a/xen/include/asm-x86/hap.h
+++ b/xen/include/asm-x86/hap.h
@@ -63,6 +63,7 @@ int   hap_track_dirty_vram(struct domain *d,
                            XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
 
 extern const struct paging_mode *hap_paging_get_mode(struct vcpu *);
+void hap_set_pvh_alloc_for_dom0(struct domain *d, unsigned long num_pages);
 
 #endif /* XEN_HAP_H */
 
-- 
1.7.2.3

  parent reply	other threads:[~2013-12-06  2:38 UTC|newest]

Thread overview: 46+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-12-06  2:38 [V6 PATCH 0/7]: PVH dom0 Mukesh Rathor
2013-12-06  2:38 ` [V6 PATCH 1/7] pvh dom0: move some pv specific code to static functions Mukesh Rathor
2013-12-06  2:38 ` Mukesh Rathor [this message]
2013-12-06  2:38 ` [V6 PATCH 3/7] pvh dom0: implement XENMEM_add_to_physmap_range for x86 Mukesh Rathor
2013-12-06  2:38 ` [V6 PATCH 4/7] pvh dom0: Introduce p2m_map_foreign Mukesh Rathor
2013-12-09 12:02   ` Tim Deegan
2013-12-06  2:38 ` [V6 PATCH 5/7] pvh: change xsm_add_to_physmap Mukesh Rathor
2013-12-06  2:38 ` [V6 PATCH 6/7] pvh dom0: Add and remove foreign pages Mukesh Rathor
2013-12-06  2:54   ` Mukesh Rathor
2013-12-06 11:46     ` Jan Beulich
2013-12-07  2:09       ` Mukesh Rathor
2013-12-07  2:34   ` [V6 PATCH 6.1/7] " Mukesh Rathor
2013-12-07 16:06     ` Julien Grall
2013-12-09  9:50     ` Jan Beulich
2013-12-10  1:30       ` Mukesh Rathor
2013-12-09 10:31     ` Ian Campbell
2013-12-09 13:46       ` Julien Grall
2013-12-09 12:11     ` Tim Deegan
2013-12-10  2:16       ` Mukesh Rathor
2013-12-09  2:45   ` [V6 PATCH 6/7] " Julien Grall
2013-12-09  2:57     ` Julien Grall
2013-12-10  2:17     ` Mukesh Rathor
2013-12-11  0:27   ` [V6 PATCH 6.2/7] " Mukesh Rathor
2013-12-11  0:44     ` Mukesh Rathor
2013-12-11  1:35       ` Julien Grall
2013-12-11  1:47         ` Mukesh Rathor
2013-12-11  9:23           ` Jan Beulich
2013-12-11 14:29           ` Tim Deegan
2013-12-12  2:46             ` Mukesh Rathor
2013-12-13  2:44               ` Mukesh Rathor
2013-12-13 11:25                 ` Tim Deegan
2013-12-13 11:39                   ` Jan Beulich
2013-12-13 19:02                     ` George Dunlap
2013-12-16  7:47                       ` Jan Beulich
2013-12-14  2:48                   ` Mukesh Rathor
2013-12-16  8:40                     ` Jan Beulich
2013-12-16 23:27                       ` Mukesh Rathor
2013-12-16 23:44                         ` Julien Grall
2013-12-17  1:51                           ` Mukesh Rathor
2013-12-17  2:33                         ` Mukesh Rathor
2013-12-17 10:10                         ` Tim Deegan
2013-12-17 23:24                           ` Mukesh Rathor
2013-12-18  2:34                           ` Mukesh Rathor
2013-12-18  9:51                             ` Jan Beulich
2013-12-18  9:53                             ` Tim Deegan
2013-12-06  2:38 ` [V6 PATCH 7/7] pvh dom0: add opt_dom0pvh to setup.c Mukesh Rathor

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1386297524-15483-3-git-send-email-mukesh.rathor@oracle.com \
    --to=mukesh.rathor@oracle.com \
    --cc=JBeulich@suse.com \
    --cc=Xen-devel@lists.xensource.com \
    --cc=george.dunlap@eu.citrix.com \
    --cc=keir.xen@gmail.com \
    --cc=tim@xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.