All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] xen: fix setup of PVH Dom0 memory map
@ 2014-05-22 17:01 Roger Pau Monne
  2014-05-23  7:13 ` Jan Beulich
  0 siblings, 1 reply; 3+ messages in thread
From: Roger Pau Monne @ 2014-05-22 17:01 UTC (permalink / raw)
  To: xen-devel; +Cc: Tim Deegan, Jan Beulich, Roger Pau Monne

This patch adds the holes removed by MMIO regions to the end of the
memory map for PVH Dom0, so the guest OS doesn't have to manually
populate this memory.

Also, provide a suitable e820 memory map for PVH Dom0, that matches
the underlying p2m map. This means that PVH guests should always use
XENMEM_memory_map in order to obtain the e820, even when running as
Dom0.

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
Cc: Jan Beulich <jbeulich@suse.com>
Cc: Tim Deegan <tim@xen.org>
Cc: Mukesh Rathor <mukesh.rathor@oracle.com>
---
Changes since v1:
 - Fix comparison of start_pfn and nr_pages to use < instead of <=.
 - Use memory pages from d->page_list in order to fill the holes in
   the memory map.
 - Use assigments instead of memcpy to create the Dom0 e820 map.
 - Make sure we are calculating the right size of the e820 entries by
   always rounding up the size.
---
 xen/arch/x86/domain_build.c |  107 +++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 104 insertions(+), 3 deletions(-)

diff --git a/xen/arch/x86/domain_build.c b/xen/arch/x86/domain_build.c
index 38ed9f6..52c6fba 100644
--- a/xen/arch/x86/domain_build.c
+++ b/xen/arch/x86/domain_build.c
@@ -327,11 +327,14 @@ static __init void pvh_add_mem_mapping(struct domain *d, unsigned long gfn,
  * pvh fixme: The following doesn't map MMIO ranges when they sit above the
  *            highest E820 covered address.
  */
-static __init void pvh_map_all_iomem(struct domain *d)
+static __init void pvh_map_all_iomem(struct domain *d, unsigned long nr_pages)
 {
     unsigned long start_pfn, end_pfn, end = 0, start = 0;
     const struct e820entry *entry;
-    unsigned int i, nump;
+    unsigned long nump, nmap, navail, mfn, nr_holes = 0;
+    unsigned int i;
+    struct page_info *page;
+    int rc;
 
     for ( i = 0, entry = e820.map; i < e820.nr_map; i++, entry++ )
     {
@@ -353,6 +356,9 @@ static __init void pvh_map_all_iomem(struct domain *d)
                 nump = end_pfn - start_pfn;
                 /* Add pages to the mapping */
                 pvh_add_mem_mapping(d, start_pfn, start_pfn, nump);
+                if ( start_pfn < nr_pages )
+                    nr_holes += (end_pfn < nr_pages) ?
+                                    nump : (nr_pages - start_pfn);
             }
             start = end;
         }
@@ -369,6 +375,100 @@ static __init void pvh_map_all_iomem(struct domain *d)
         nump = end_pfn - start_pfn;
         pvh_add_mem_mapping(d, start_pfn, start_pfn, nump);
     }
+
+    /*
+     * Add the memory removed by the holes at the end of the
+     * memory map.
+     */
+    for ( i = 0, entry = e820.map; i < e820.nr_map && nr_holes > 0;
+          i++, entry++ )
+    {
+        if ( entry->type != E820_RAM )
+            continue;
+
+        end_pfn = PFN_UP(entry->addr + entry->size);
+        if ( end_pfn <= nr_pages )
+            continue;
+
+        navail = end_pfn - nr_pages;
+        nmap = navail > nr_holes ? nr_holes : navail;
+        nr_holes -= nmap;
+        start_pfn = PFN_DOWN(entry->addr) < nr_pages ?
+                        nr_pages : PFN_DOWN(entry->addr);
+        /*
+         * Populate this memory region using the pages
+         * previously removed by the MMIO holes.
+         */
+        page_list_for_each ( page, &d->page_list )
+        {
+            mfn = page_to_mfn(page);
+            if ( get_gpfn_from_mfn(mfn) != INVALID_M2P_ENTRY )
+                continue;
+
+            rc = guest_physmap_add_page(d, start_pfn, mfn, 0);
+            if ( rc != 0 )
+                panic("Unable to add gpfn %#lx mfn %#lx to Dom0 physmap",
+                      start_pfn, page_to_mfn(page));
+            start_pfn++;
+            if ( --nmap == 0 )
+                break;
+        }
+        ASSERT(nmap == 0);
+    }
+
+    ASSERT(nr_holes == 0);
+}
+
+static __init void pvh_setup_e820(struct domain *d, unsigned long nr_pages)
+{
+    struct e820entry *entry, *entry_guest;
+    unsigned int i;
+    unsigned long pages, cur_pages = 0;
+
+    /*
+     * Craft the e820 memory map for Dom0 based on the hardware e820 map.
+     */
+    d->arch.e820 = xzalloc_array(struct e820entry, e820.nr_map);
+    if ( !d->arch.e820 )
+        panic("Unable to allocate memory for Dom0 e820 map");
+    entry_guest = d->arch.e820;
+
+    /* Clamp e820 memory map to match the memory assigned to Dom0 */
+    for ( i = 0, entry = e820.map; i < e820.nr_map; i++, entry++ )
+    {
+        if ( entry->type != E820_RAM )
+        {
+            *entry_guest = *entry;
+            goto next;
+        }
+
+        if ( nr_pages == cur_pages )
+        {
+            /*
+             * We already have all the assigned memory,
+             * skip this entry
+             */
+            continue;
+        }
+
+        *entry_guest = *entry;
+        pages = (entry_guest->size + PAGE_SIZE-1) >> PAGE_SHIFT;
+        if ( (cur_pages + pages) > nr_pages )
+        {
+            /* Truncate region */
+            entry_guest->size = (nr_pages - cur_pages) << PAGE_SHIFT;
+            cur_pages = nr_pages;
+        }
+        else
+        {
+            cur_pages += pages;
+        }
+next:
+        d->arch.nr_e820++;
+        entry_guest++;
+    }
+    ASSERT(cur_pages == nr_pages);
+    ASSERT(d->arch.nr_e820 <= e820.nr_map);
 }
 
 static __init void dom0_update_physmap(struct domain *d, unsigned long pfn,
@@ -1391,7 +1491,8 @@ int __init construct_dom0(
         pfn = shared_info_paddr >> PAGE_SHIFT;
         dom0_update_physmap(d, pfn, mfn, 0);
 
-        pvh_map_all_iomem(d);
+        pvh_map_all_iomem(d, nr_pages);
+        pvh_setup_e820(d, nr_pages);
     }
 
     if ( d->domain_id == hardware_domid )
-- 
1.7.7.5 (Apple Git-26)


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH v2] xen: fix setup of PVH Dom0 memory map
  2014-05-22 17:01 [PATCH v2] xen: fix setup of PVH Dom0 memory map Roger Pau Monne
@ 2014-05-23  7:13 ` Jan Beulich
  2014-05-23 15:25   ` Roger Pau Monné
  0 siblings, 1 reply; 3+ messages in thread
From: Jan Beulich @ 2014-05-23  7:13 UTC (permalink / raw)
  To: Roger Pau Monne; +Cc: xen-devel, Tim Deegan

>>> On 22.05.14 at 19:01, <roger.pau@citrix.com> wrote:
> @@ -369,6 +375,100 @@ static __init void pvh_map_all_iomem(struct domain *d)
>          nump = end_pfn - start_pfn;
>          pvh_add_mem_mapping(d, start_pfn, start_pfn, nump);
>      }
> +
> +    /*
> +     * Add the memory removed by the holes at the end of the
> +     * memory map.
> +     */
> +    for ( i = 0, entry = e820.map; i < e820.nr_map && nr_holes > 0;
> +          i++, entry++ )
> +    {
> +        if ( entry->type != E820_RAM )
> +            continue;
> +
> +        end_pfn = PFN_UP(entry->addr + entry->size);
> +        if ( end_pfn <= nr_pages )
> +            continue;
> +
> +        navail = end_pfn - nr_pages;
> +        nmap = navail > nr_holes ? nr_holes : navail;

min()?

> +        nr_holes -= nmap;
> +        start_pfn = PFN_DOWN(entry->addr) < nr_pages ?
> +                        nr_pages : PFN_DOWN(entry->addr);

max_t()?

> +        /*
> +         * Populate this memory region using the pages
> +         * previously removed by the MMIO holes.
> +         */
> +        page_list_for_each ( page, &d->page_list )
> +        {
> +            mfn = page_to_mfn(page);
> +            if ( get_gpfn_from_mfn(mfn) != INVALID_M2P_ENTRY )
> +                continue;

This being inside another loop, this is going to be unnecessarily
expensive on big memory systems without dom0_mem=, as you're
going to repeatedly skip a huge (and increasing) number of pages
here. So apart from needing to add a process_pending_softirqs()
call somewhere, I think you should have a cursor into the list
(which isn't going to change under your feet).

Talking of which, Mukesh, I think the lack of periodic calls to
process_pending_softirqs() is also a latent problem in the function
without Roger's changes. Furthermore now that I look at it again I
wonder whether this whole thing shouldn't be SRAT based (when
there is an SRAT) in order to avoid mapping hotplug memory
regions as MMIO.

> +
> +            rc = guest_physmap_add_page(d, start_pfn, mfn, 0);
> +            if ( rc != 0 )
> +                panic("Unable to add gpfn %#lx mfn %#lx to Dom0 physmap",
> +                      start_pfn, page_to_mfn(page));

Please print rc here too, to help diagnosing eventual problems. Also
please use "mfn" rather than "page_to_mfn(page)".

> +            start_pfn++;
> +            if ( --nmap == 0 )
> +                break;
> +        }
> +        ASSERT(nmap == 0);
> +    }
> +
> +    ASSERT(nr_holes == 0);
> +}
> +
> +static __init void pvh_setup_e820(struct domain *d, unsigned long nr_pages)
> +{
> +    struct e820entry *entry, *entry_guest;
> +    unsigned int i;
> +    unsigned long pages, cur_pages = 0;
> +
> +    /*
> +     * Craft the e820 memory map for Dom0 based on the hardware e820 map.
> +     */
> +    d->arch.e820 = xzalloc_array(struct e820entry, e820.nr_map);
> +    if ( !d->arch.e820 )
> +        panic("Unable to allocate memory for Dom0 e820 map");
> +    entry_guest = d->arch.e820;
> +
> +    /* Clamp e820 memory map to match the memory assigned to Dom0 */
> +    for ( i = 0, entry = e820.map; i < e820.nr_map; i++, entry++ )
> +    {
> +        if ( entry->type != E820_RAM )
> +        {
> +            *entry_guest = *entry;
> +            goto next;
> +        }
> +
> +        if ( nr_pages == cur_pages )
> +        {
> +            /*
> +             * We already have all the assigned memory,
> +             * skip this entry
> +             */
> +            continue;
> +        }
> +
> +        *entry_guest = *entry;
> +        pages = (entry_guest->size + PAGE_SIZE-1) >> PAGE_SHIFT;

PFN_UP()?

> +        if ( (cur_pages + pages) > nr_pages )
> +        {
> +            /* Truncate region */
> +            entry_guest->size = (nr_pages - cur_pages) << PAGE_SHIFT;
> +            cur_pages = nr_pages;
> +        }
> +        else
> +        {
> +            cur_pages += pages;
> +        }
> +next:

Labels indented by at least on space please (avoiding diff's -p option
picking this up as hunk context in future patches).

Jan

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v2] xen: fix setup of PVH Dom0 memory map
  2014-05-23  7:13 ` Jan Beulich
@ 2014-05-23 15:25   ` Roger Pau Monné
  0 siblings, 0 replies; 3+ messages in thread
From: Roger Pau Monné @ 2014-05-23 15:25 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, Tim Deegan

On 23/05/14 09:13, Jan Beulich wrote:
>>>> On 22.05.14 at 19:01, <roger.pau@citrix.com> wrote:
>> @@ -369,6 +375,100 @@ static __init void pvh_map_all_iomem(struct domain *d)
>>          nump = end_pfn - start_pfn;
>>          pvh_add_mem_mapping(d, start_pfn, start_pfn, nump);
>>      }
>> +
>> +    /*
>> +     * Add the memory removed by the holes at the end of the
>> +     * memory map.
>> +     */
>> +    for ( i = 0, entry = e820.map; i < e820.nr_map && nr_holes > 0;
>> +          i++, entry++ )
>> +    {
>> +        if ( entry->type != E820_RAM )
>> +            continue;
>> +
>> +        end_pfn = PFN_UP(entry->addr + entry->size);
>> +        if ( end_pfn <= nr_pages )
>> +            continue;
>> +
>> +        navail = end_pfn - nr_pages;
>> +        nmap = navail > nr_holes ? nr_holes : navail;
> 
> min()?
> 
>> +        nr_holes -= nmap;
>> +        start_pfn = PFN_DOWN(entry->addr) < nr_pages ?
>> +                        nr_pages : PFN_DOWN(entry->addr);
> 
> max_t()?

Fixed both.

>> +        /*
>> +         * Populate this memory region using the pages
>> +         * previously removed by the MMIO holes.
>> +         */
>> +        page_list_for_each ( page, &d->page_list )
>> +        {
>> +            mfn = page_to_mfn(page);
>> +            if ( get_gpfn_from_mfn(mfn) != INVALID_M2P_ENTRY )
>> +                continue;
> 
> This being inside another loop, this is going to be unnecessarily
> expensive on big memory systems without dom0_mem=, as you're
> going to repeatedly skip a huge (and increasing) number of pages
> here. So apart from needing to add a process_pending_softirqs()
> call somewhere, I think you should have a cursor into the list
> (which isn't going to change under your feet).
> 
> Talking of which, Mukesh, I think the lack of periodic calls to
> process_pending_softirqs() is also a latent problem in the function
> without Roger's changes. Furthermore now that I look at it again I
> wonder whether this whole thing shouldn't be SRAT based (when
> there is an SRAT) in order to avoid mapping hotplug memory
> regions as MMIO.

I've added a call to process_pending_softirqs on pvh_add_mem_mapping and
also one here, in case we are dealing with big holes. I will leave the
SRAT stuff for later, since it's a much wider change probably.

>> +
>> +            rc = guest_physmap_add_page(d, start_pfn, mfn, 0);
>> +            if ( rc != 0 )
>> +                panic("Unable to add gpfn %#lx mfn %#lx to Dom0 physmap",
>> +                      start_pfn, page_to_mfn(page));
> 
> Please print rc here too, to help diagnosing eventual problems. Also
> please use "mfn" rather than "page_to_mfn(page)".

Done.

>> +            start_pfn++;
>> +            if ( --nmap == 0 )
>> +                break;
>> +        }
>> +        ASSERT(nmap == 0);
>> +    }
>> +
>> +    ASSERT(nr_holes == 0);
>> +}
>> +
>> +static __init void pvh_setup_e820(struct domain *d, unsigned long nr_pages)
>> +{
>> +    struct e820entry *entry, *entry_guest;
>> +    unsigned int i;
>> +    unsigned long pages, cur_pages = 0;
>> +
>> +    /*
>> +     * Craft the e820 memory map for Dom0 based on the hardware e820 map.
>> +     */
>> +    d->arch.e820 = xzalloc_array(struct e820entry, e820.nr_map);
>> +    if ( !d->arch.e820 )
>> +        panic("Unable to allocate memory for Dom0 e820 map");
>> +    entry_guest = d->arch.e820;
>> +
>> +    /* Clamp e820 memory map to match the memory assigned to Dom0 */
>> +    for ( i = 0, entry = e820.map; i < e820.nr_map; i++, entry++ )
>> +    {
>> +        if ( entry->type != E820_RAM )
>> +        {
>> +            *entry_guest = *entry;
>> +            goto next;
>> +        }
>> +
>> +        if ( nr_pages == cur_pages )
>> +        {
>> +            /*
>> +             * We already have all the assigned memory,
>> +             * skip this entry
>> +             */
>> +            continue;
>> +        }
>> +
>> +        *entry_guest = *entry;
>> +        pages = (entry_guest->size + PAGE_SIZE-1) >> PAGE_SHIFT;
> 
> PFN_UP()?
> 
>> +        if ( (cur_pages + pages) > nr_pages )
>> +        {
>> +            /* Truncate region */
>> +            entry_guest->size = (nr_pages - cur_pages) << PAGE_SHIFT;
>> +            cur_pages = nr_pages;
>> +        }
>> +        else
>> +        {
>> +            cur_pages += pages;
>> +        }
>> +next:
> 
> Labels indented by at least on space please (avoiding diff's -p option
> picking this up as hunk context in future patches).

Both of the above comments are fixed, I'm going to send v3 now.

Roger.

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2014-05-23 17:22 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-05-22 17:01 [PATCH v2] xen: fix setup of PVH Dom0 memory map Roger Pau Monne
2014-05-23  7:13 ` Jan Beulich
2014-05-23 15:25   ` Roger Pau Monné

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.