All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Jan Beulich" <JBeulich@suse.com>
To: xen-devel <xen-devel@lists.xen.org>
Subject: [PATCH v2] x86: debugging code for testing 16Tb support on smaller memory systems
Date: Wed, 23 Jan 2013 14:26:28 +0000	[thread overview]
Message-ID: <5100012402000078000B8AB9@nat28.tlf.novell.com> (raw)
In-Reply-To: <50FE7EFD02000078000B8359@nat28.tlf.novell.com>

[-- Attachment #1: Type: text/plain, Size: 7872 bytes --]

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: Removed unwanted bits and switched to byte-granular "highmem-start"
    option.

--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -546,6 +546,12 @@ Paging (HAP).
 ### hvm\_port80
 > `= <boolean>`
 
+### highmem-start
+> `= <size>`
+
+Specify the memory boundary past which memory will be treated as highmem (x86
+debug hypervisor only).
+
 ### idle\_latency\_factor
 > `= <integer>`
 
--- a/xen/arch/x86/domain_page.c
+++ b/xen/arch/x86/domain_page.c
@@ -66,8 +66,10 @@ void *map_domain_page(unsigned long mfn)
     struct mapcache_vcpu *vcache;
     struct vcpu_maphash_entry *hashent;
 
+#ifdef NDEBUG
     if ( mfn <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) )
         return mfn_to_virt(mfn);
+#endif
 
     v = mapcache_current_vcpu();
     if ( !v || is_hvm_vcpu(v) )
@@ -249,8 +251,10 @@ int mapcache_domain_init(struct domain *
     if ( is_hvm_domain(d) || is_idle_domain(d) )
         return 0;
 
+#ifdef NDEBUG
     if ( !mem_hotplug && max_page <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) )
         return 0;
+#endif
 
     dcache->l1tab = xzalloc_array(l1_pgentry_t *, MAPCACHE_L2_ENTRIES + 1);
     d->arch.perdomain_l2_pg[MAPCACHE_SLOT] = alloc_domheap_page(NULL, memf);
@@ -418,8 +422,10 @@ void *map_domain_page_global(unsigned lo
 
     ASSERT(!in_irq() && local_irq_is_enabled());
 
+#ifdef NDEBUG
     if ( mfn <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) )
         return mfn_to_virt(mfn);
+#endif
 
     spin_lock(&globalmap_lock);
 
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -250,6 +250,14 @@ void __init init_frametable(void)
         init_spagetable();
 }
 
+#ifndef NDEBUG
+static unsigned int __read_mostly root_pgt_pv_xen_slots
+    = ROOT_PAGETABLE_PV_XEN_SLOTS;
+static l4_pgentry_t __read_mostly split_l4e;
+#else
+#define root_pgt_pv_xen_slots ROOT_PAGETABLE_PV_XEN_SLOTS
+#endif
+
 void __init arch_init_memory(void)
 {
     unsigned long i, pfn, rstart_pfn, rend_pfn, iostart_pfn, ioend_pfn;
@@ -344,6 +352,40 @@ void __init arch_init_memory(void)
     efi_init_memory();
 
     mem_sharing_init();
+
+#ifndef NDEBUG
+    if ( highmem_start )
+    {
+        unsigned long split_va = (unsigned long)__va(highmem_start);
+
+        if ( split_va < HYPERVISOR_VIRT_END &&
+             split_va - 1 == (unsigned long)__va(highmem_start - 1) )
+        {
+            root_pgt_pv_xen_slots = l4_table_offset(split_va) -
+                                    ROOT_PAGETABLE_FIRST_XEN_SLOT;
+            ASSERT(root_pgt_pv_xen_slots < ROOT_PAGETABLE_PV_XEN_SLOTS);
+            if ( l4_table_offset(split_va) == l4_table_offset(split_va - 1) )
+            {
+                l3_pgentry_t *l3tab = alloc_xen_pagetable();
+
+                if ( l3tab )
+                {
+                    const l3_pgentry_t *l3idle =
+                        l4e_to_l3e(idle_pg_table[l4_table_offset(split_va)]);
+
+                    for ( i = 0; i < l3_table_offset(split_va); ++i )
+                        l3tab[i] = l3idle[i];
+                    for ( ; i <= L3_PAGETABLE_ENTRIES; ++i )
+                        l3tab[i] = l3e_empty();
+                    split_l4e = l4e_from_pfn(virt_to_mfn(l3tab),
+                                             __PAGE_HYPERVISOR);
+                }
+                else
+                    ++root_pgt_pv_xen_slots;
+            }
+        }
+    }
+#endif
 }
 
 int page_is_ram_type(unsigned long mfn, unsigned long mem_type)
@@ -1320,7 +1362,12 @@ void init_guest_l4_table(l4_pgentry_t l4
     /* Xen private mappings. */
     memcpy(&l4tab[ROOT_PAGETABLE_FIRST_XEN_SLOT],
            &idle_pg_table[ROOT_PAGETABLE_FIRST_XEN_SLOT],
-           ROOT_PAGETABLE_PV_XEN_SLOTS * sizeof(l4_pgentry_t));
+           root_pgt_pv_xen_slots * sizeof(l4_pgentry_t));
+#ifndef NDEBUG
+    if ( l4e_get_intpte(split_l4e) )
+        l4tab[ROOT_PAGETABLE_FIRST_XEN_SLOT + root_pgt_pv_xen_slots] =
+            split_l4e;
+#endif
     l4tab[l4_table_offset(LINEAR_PT_VIRT_START)] =
         l4e_from_pfn(domain_page_map_to_mfn(l4tab), __PAGE_HYPERVISOR);
     l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] =
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -82,6 +82,11 @@ boolean_param("noapic", skip_ioapic_setu
 s8 __read_mostly xen_cpuidle = -1;
 boolean_param("cpuidle", xen_cpuidle);
 
+#ifndef NDEBUG
+unsigned long __initdata highmem_start;
+size_param("highmem-start", highmem_start);
+#endif
+
 cpumask_t __read_mostly cpu_present_map;
 
 unsigned long __read_mostly xen_phys_start;
@@ -787,6 +792,14 @@ void __init __start_xen(unsigned long mb
     modules_headroom = bzimage_headroom(bootstrap_map(mod), mod->mod_end);
     bootstrap_map(NULL);
 
+#ifndef highmem_start
+    /* Don't allow split below 4Gb. */
+    if ( highmem_start < GB(4) )
+        highmem_start = 0;
+    else /* align to L3 entry boundary */
+        highmem_start &= ~((1UL << L3_PAGETABLE_SHIFT) - 1);
+#endif
+
     for ( i = boot_e820.nr_map-1; i >= 0; i-- )
     {
         uint64_t s, e, mask = (1UL << L2_PAGETABLE_SHIFT) - 1;
@@ -915,6 +928,9 @@ void __init __start_xen(unsigned long mb
             /* Don't overlap with other modules. */
             end = consider_modules(s, e, size, mod, mbi->mods_count, j);
 
+            if ( highmem_start && end > highmem_start )
+                continue;
+
             if ( s < end &&
                  (headroom ||
                   ((end - size) >> PAGE_SHIFT) > mod[j].mod_start) )
@@ -956,6 +972,8 @@ void __init __start_xen(unsigned long mb
     kexec_reserve_area(&boot_e820);
 
     setup_max_pdx();
+    if ( highmem_start )
+        xenheap_max_mfn(PFN_DOWN(highmem_start));
 
     /*
      * Walk every RAM region and map it in its entirety (on x86/64, at least)
@@ -1127,7 +1145,8 @@ void __init __start_xen(unsigned long mb
         unsigned long limit = virt_to_mfn(HYPERVISOR_VIRT_END - 1);
         uint64_t mask = PAGE_SIZE - 1;
 
-        xenheap_max_mfn(limit);
+        if ( !highmem_start )
+            xenheap_max_mfn(limit);
 
         /* Pass the remaining memory to the allocator. */
         for ( i = 0; i < boot_e820.nr_map; i++ )
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -45,6 +45,7 @@
 #include <asm/flushtlb.h>
 #ifdef CONFIG_X86
 #include <asm/p2m.h>
+#include <asm/setup.h> /* for highmem_start only */
 #else
 #define p2m_pod_offline_or_broken_hit(pg) 0
 #define p2m_pod_offline_or_broken_replace(pg) BUG_ON(pg != NULL)
@@ -203,6 +204,25 @@ unsigned long __init alloc_boot_pages(
         pg = (r->e - nr_pfns) & ~(pfn_align - 1);
         if ( pg < r->s )
             continue;
+
+#if defined(CONFIG_X86) && !defined(NDEBUG)
+        /*
+         * Filtering pfn_align == 1 since the only allocations using a bigger
+         * alignment are the ones used for setting up the frame table chunks.
+         * Those allocations get remapped anyway, i.e. them not having 1:1
+         * mappings always accessible is not a problem.
+         */
+        if ( highmem_start && pfn_align == 1 &&
+             r->e > PFN_DOWN(highmem_start) )
+        {
+            pg = r->s;
+            if ( pg + nr_pfns > PFN_DOWN(highmem_start) )
+                continue;
+            r->s = pg + nr_pfns;
+            return pg;
+        }
+#endif
+
         _e = r->e;
         r->e = pg;
         bootmem_region_add(pg + nr_pfns, _e);
--- a/xen/include/asm-x86/setup.h
+++ b/xen/include/asm-x86/setup.h
@@ -43,4 +43,10 @@ void microcode_grab_module(
 
 extern uint8_t kbd_shift_flags;
 
+#ifdef NDEBUG
+# define highmem_start 0
+#else
+extern unsigned long highmem_start;
+#endif
+
 #endif



[-- Attachment #2: x86-map-domain-debug.patch --]
[-- Type: text/plain, Size: 7942 bytes --]

x86: debugging code for testing 16Tb support on smaller memory systems

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: Removed unwanted bits and switched to byte-granular "highmem-start"
    option.

--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -546,6 +546,12 @@ Paging (HAP).
 ### hvm\_port80
 > `= <boolean>`
 
+### highmem-start
+> `= <size>`
+
+Specify the memory boundary past which memory will be treated as highmem (x86
+debug hypervisor only).
+
 ### idle\_latency\_factor
 > `= <integer>`
 
--- a/xen/arch/x86/domain_page.c
+++ b/xen/arch/x86/domain_page.c
@@ -66,8 +66,10 @@ void *map_domain_page(unsigned long mfn)
     struct mapcache_vcpu *vcache;
     struct vcpu_maphash_entry *hashent;
 
+#ifdef NDEBUG
     if ( mfn <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) )
         return mfn_to_virt(mfn);
+#endif
 
     v = mapcache_current_vcpu();
     if ( !v || is_hvm_vcpu(v) )
@@ -249,8 +251,10 @@ int mapcache_domain_init(struct domain *
     if ( is_hvm_domain(d) || is_idle_domain(d) )
         return 0;
 
+#ifdef NDEBUG
     if ( !mem_hotplug && max_page <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) )
         return 0;
+#endif
 
     dcache->l1tab = xzalloc_array(l1_pgentry_t *, MAPCACHE_L2_ENTRIES + 1);
     d->arch.perdomain_l2_pg[MAPCACHE_SLOT] = alloc_domheap_page(NULL, memf);
@@ -418,8 +422,10 @@ void *map_domain_page_global(unsigned lo
 
     ASSERT(!in_irq() && local_irq_is_enabled());
 
+#ifdef NDEBUG
     if ( mfn <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) )
         return mfn_to_virt(mfn);
+#endif
 
     spin_lock(&globalmap_lock);
 
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -250,6 +250,14 @@ void __init init_frametable(void)
         init_spagetable();
 }
 
+#ifndef NDEBUG
+static unsigned int __read_mostly root_pgt_pv_xen_slots
+    = ROOT_PAGETABLE_PV_XEN_SLOTS;
+static l4_pgentry_t __read_mostly split_l4e;
+#else
+#define root_pgt_pv_xen_slots ROOT_PAGETABLE_PV_XEN_SLOTS
+#endif
+
 void __init arch_init_memory(void)
 {
     unsigned long i, pfn, rstart_pfn, rend_pfn, iostart_pfn, ioend_pfn;
@@ -344,6 +352,40 @@ void __init arch_init_memory(void)
     efi_init_memory();
 
     mem_sharing_init();
+
+#ifndef NDEBUG
+    if ( highmem_start )
+    {
+        unsigned long split_va = (unsigned long)__va(highmem_start);
+
+        if ( split_va < HYPERVISOR_VIRT_END &&
+             split_va - 1 == (unsigned long)__va(highmem_start - 1) )
+        {
+            root_pgt_pv_xen_slots = l4_table_offset(split_va) -
+                                    ROOT_PAGETABLE_FIRST_XEN_SLOT;
+            ASSERT(root_pgt_pv_xen_slots < ROOT_PAGETABLE_PV_XEN_SLOTS);
+            if ( l4_table_offset(split_va) == l4_table_offset(split_va - 1) )
+            {
+                l3_pgentry_t *l3tab = alloc_xen_pagetable();
+
+                if ( l3tab )
+                {
+                    const l3_pgentry_t *l3idle =
+                        l4e_to_l3e(idle_pg_table[l4_table_offset(split_va)]);
+
+                    for ( i = 0; i < l3_table_offset(split_va); ++i )
+                        l3tab[i] = l3idle[i];
+                    for ( ; i <= L3_PAGETABLE_ENTRIES; ++i )
+                        l3tab[i] = l3e_empty();
+                    split_l4e = l4e_from_pfn(virt_to_mfn(l3tab),
+                                             __PAGE_HYPERVISOR);
+                }
+                else
+                    ++root_pgt_pv_xen_slots;
+            }
+        }
+    }
+#endif
 }
 
 int page_is_ram_type(unsigned long mfn, unsigned long mem_type)
@@ -1320,7 +1362,12 @@ void init_guest_l4_table(l4_pgentry_t l4
     /* Xen private mappings. */
     memcpy(&l4tab[ROOT_PAGETABLE_FIRST_XEN_SLOT],
            &idle_pg_table[ROOT_PAGETABLE_FIRST_XEN_SLOT],
-           ROOT_PAGETABLE_PV_XEN_SLOTS * sizeof(l4_pgentry_t));
+           root_pgt_pv_xen_slots * sizeof(l4_pgentry_t));
+#ifndef NDEBUG
+    if ( l4e_get_intpte(split_l4e) )
+        l4tab[ROOT_PAGETABLE_FIRST_XEN_SLOT + root_pgt_pv_xen_slots] =
+            split_l4e;
+#endif
     l4tab[l4_table_offset(LINEAR_PT_VIRT_START)] =
         l4e_from_pfn(domain_page_map_to_mfn(l4tab), __PAGE_HYPERVISOR);
     l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] =
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -82,6 +82,11 @@ boolean_param("noapic", skip_ioapic_setu
 s8 __read_mostly xen_cpuidle = -1;
 boolean_param("cpuidle", xen_cpuidle);
 
+#ifndef NDEBUG
+unsigned long __initdata highmem_start;
+size_param("highmem-start", highmem_start);
+#endif
+
 cpumask_t __read_mostly cpu_present_map;
 
 unsigned long __read_mostly xen_phys_start;
@@ -787,6 +792,14 @@ void __init __start_xen(unsigned long mb
     modules_headroom = bzimage_headroom(bootstrap_map(mod), mod->mod_end);
     bootstrap_map(NULL);
 
+#ifndef highmem_start
+    /* Don't allow split below 4Gb. */
+    if ( highmem_start < GB(4) )
+        highmem_start = 0;
+    else /* align to L3 entry boundary */
+        highmem_start &= ~((1UL << L3_PAGETABLE_SHIFT) - 1);
+#endif
+
     for ( i = boot_e820.nr_map-1; i >= 0; i-- )
     {
         uint64_t s, e, mask = (1UL << L2_PAGETABLE_SHIFT) - 1;
@@ -915,6 +928,9 @@ void __init __start_xen(unsigned long mb
             /* Don't overlap with other modules. */
             end = consider_modules(s, e, size, mod, mbi->mods_count, j);
 
+            if ( highmem_start && end > highmem_start )
+                continue;
+
             if ( s < end &&
                  (headroom ||
                   ((end - size) >> PAGE_SHIFT) > mod[j].mod_start) )
@@ -956,6 +972,8 @@ void __init __start_xen(unsigned long mb
     kexec_reserve_area(&boot_e820);
 
     setup_max_pdx();
+    if ( highmem_start )
+        xenheap_max_mfn(PFN_DOWN(highmem_start));
 
     /*
      * Walk every RAM region and map it in its entirety (on x86/64, at least)
@@ -1127,7 +1145,8 @@ void __init __start_xen(unsigned long mb
         unsigned long limit = virt_to_mfn(HYPERVISOR_VIRT_END - 1);
         uint64_t mask = PAGE_SIZE - 1;
 
-        xenheap_max_mfn(limit);
+        if ( !highmem_start )
+            xenheap_max_mfn(limit);
 
         /* Pass the remaining memory to the allocator. */
         for ( i = 0; i < boot_e820.nr_map; i++ )
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -45,6 +45,7 @@
 #include <asm/flushtlb.h>
 #ifdef CONFIG_X86
 #include <asm/p2m.h>
+#include <asm/setup.h> /* for highmem_start only */
 #else
 #define p2m_pod_offline_or_broken_hit(pg) 0
 #define p2m_pod_offline_or_broken_replace(pg) BUG_ON(pg != NULL)
@@ -203,6 +204,25 @@ unsigned long __init alloc_boot_pages(
         pg = (r->e - nr_pfns) & ~(pfn_align - 1);
         if ( pg < r->s )
             continue;
+
+#if defined(CONFIG_X86) && !defined(NDEBUG)
+        /*
+         * Filtering pfn_align == 1 since the only allocations using a bigger
+         * alignment are the ones used for setting up the frame table chunks.
+         * Those allocations get remapped anyway, i.e. them not having 1:1
+         * mappings always accessible is not a problem.
+         */
+        if ( highmem_start && pfn_align == 1 &&
+             r->e > PFN_DOWN(highmem_start) )
+        {
+            pg = r->s;
+            if ( pg + nr_pfns > PFN_DOWN(highmem_start) )
+                continue;
+            r->s = pg + nr_pfns;
+            return pg;
+        }
+#endif
+
         _e = r->e;
         r->e = pg;
         bootmem_region_add(pg + nr_pfns, _e);
--- a/xen/include/asm-x86/setup.h
+++ b/xen/include/asm-x86/setup.h
@@ -43,4 +43,10 @@ void microcode_grab_module(
 
 extern uint8_t kbd_shift_flags;
 
+#ifdef NDEBUG
+# define highmem_start 0
+#else
+extern unsigned long highmem_start;
+#endif
+
 #endif

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

  reply	other threads:[~2013-01-23 14:26 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-01-22 10:45 [PATCH 00/11] x86: support up to 16Tb Jan Beulich
2013-01-22 10:50 ` [PATCH 02/11] x86: extend frame table virtual space Jan Beulich
2013-01-22 10:50 ` [PATCH 03/11] x86: re-introduce map_domain_page() et al Jan Beulich
2013-01-22 10:51 ` [PATCH 04/11] x86: properly use map_domain_page() when building Dom0 Jan Beulich
2013-01-22 10:52 ` [PATCH 05/11] x86: consolidate initialization of PV guest L4 page tables Jan Beulich
2013-01-22 10:53 ` [PATCH 06/11] x86: properly use map_domain_page() during domain creation/destruction Jan Beulich
2013-01-22 10:55 ` [PATCH 07/11] x86: properly use map_domain_page() during page table manipulation Jan Beulich
2013-01-22 10:55 ` [PATCH 08/11] x86: properly use map_domain_page() in nested HVM code Jan Beulich
2013-01-22 10:56 ` [PATCH 09/11] x86: properly use map_domain_page() in miscellaneous places Jan Beulich
2013-01-22 10:57 ` [PATCH 10/11] tmem: partial adjustments for x86 16Tb support Jan Beulich
2013-01-22 17:55   ` Dan Magenheimer
2013-01-22 10:57 ` [PATCH 11/11] x86: support up to 16Tb Jan Beulich
2013-01-22 15:20   ` Dan Magenheimer
2013-01-22 15:31     ` Jan Beulich
2013-01-22 10:58 ` [PATCH 12/11] x86: debugging code for testing 16Tb support on smaller memory systems Jan Beulich
2013-01-23 14:26   ` Jan Beulich [this message]
2013-01-23 15:18     ` [PATCH v2] " Keir Fraser
2013-01-24 11:36     ` Tim Deegan
2013-01-24 12:23       ` Jan Beulich
2013-01-24 12:36         ` Tim Deegan
2013-01-22 20:13 ` [PATCH 00/11] x86: support up to 16Tb Keir Fraser
2013-01-23  9:33 ` Keir Fraser
2013-01-23  9:56   ` Jan Beulich
2013-01-23 10:16     ` Keir Fraser

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5100012402000078000B8AB9@nat28.tlf.novell.com \
    --to=jbeulich@suse.com \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.