From mboxrd@z Thu Jan 1 00:00:00 1970 From: "Jan Beulich" Subject: [PATCH v2] x86: debugging code for testing 16Tb support on smaller memory systems Date: Wed, 23 Jan 2013 14:26:28 +0000 Message-ID: <5100012402000078000B8AB9@nat28.tlf.novell.com> References: <50FE7BF502000078000B82F8@nat28.tlf.novell.com> <50FE7EFD02000078000B8359@nat28.tlf.novell.com> Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="=__Part30019204.0__=" Return-path: In-Reply-To: <50FE7EFD02000078000B8359@nat28.tlf.novell.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: xen-devel List-Id: xen-devel@lists.xenproject.org This is a MIME message. If you are reading this text, you may want to consider changing to a mail reader or gateway that understands how to properly handle MIME multipart messages. --=__Part30019204.0__= Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: quoted-printable Content-Disposition: inline Signed-off-by: Jan Beulich --- v2: Removed unwanted bits and switched to byte-granular "highmem-start" option. --- a/docs/misc/xen-command-line.markdown +++ b/docs/misc/xen-command-line.markdown @@ -546,6 +546,12 @@ Paging (HAP). ### hvm\_port80 > `=3D ` =20 +### highmem-start +> `=3D ` + +Specify the memory boundary past which memory will be treated as highmem = (x86 +debug hypervisor only). + ### idle\_latency\_factor > `=3D ` =20 --- a/xen/arch/x86/domain_page.c +++ b/xen/arch/x86/domain_page.c @@ -66,8 +66,10 @@ void *map_domain_page(unsigned long mfn) struct mapcache_vcpu *vcache; struct vcpu_maphash_entry *hashent; =20 +#ifdef NDEBUG if ( mfn <=3D PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) ) return mfn_to_virt(mfn); +#endif =20 v =3D mapcache_current_vcpu(); if ( !v || is_hvm_vcpu(v) ) @@ -249,8 +251,10 @@ int mapcache_domain_init(struct domain * if ( is_hvm_domain(d) || is_idle_domain(d) ) return 0; =20 +#ifdef NDEBUG if ( !mem_hotplug && max_page <=3D PFN_DOWN(__pa(HYPERVISOR_VIRT_END = - 1)) ) return 0; +#endif =20 dcache->l1tab =3D xzalloc_array(l1_pgentry_t *, MAPCACHE_L2_ENTRIES + = 1); d->arch.perdomain_l2_pg[MAPCACHE_SLOT] =3D alloc_domheap_page(NULL, = memf); @@ -418,8 +422,10 @@ void *map_domain_page_global(unsigned lo =20 ASSERT(!in_irq() && local_irq_is_enabled()); =20 +#ifdef NDEBUG if ( mfn <=3D PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) ) return mfn_to_virt(mfn); +#endif =20 spin_lock(&globalmap_lock); =20 --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -250,6 +250,14 @@ void __init init_frametable(void) init_spagetable(); } =20 +#ifndef NDEBUG +static unsigned int __read_mostly root_pgt_pv_xen_slots + =3D ROOT_PAGETABLE_PV_XEN_SLOTS; +static l4_pgentry_t __read_mostly split_l4e; +#else +#define root_pgt_pv_xen_slots ROOT_PAGETABLE_PV_XEN_SLOTS +#endif + void __init arch_init_memory(void) { unsigned long i, pfn, rstart_pfn, rend_pfn, iostart_pfn, ioend_pfn; @@ -344,6 +352,40 @@ void __init arch_init_memory(void) efi_init_memory(); =20 mem_sharing_init(); + +#ifndef NDEBUG + if ( highmem_start ) + { + unsigned long split_va =3D (unsigned long)__va(highmem_start); + + if ( split_va < HYPERVISOR_VIRT_END && + split_va - 1 =3D=3D (unsigned long)__va(highmem_start - 1) ) + { + root_pgt_pv_xen_slots =3D l4_table_offset(split_va) - + ROOT_PAGETABLE_FIRST_XEN_SLOT; + ASSERT(root_pgt_pv_xen_slots < ROOT_PAGETABLE_PV_XEN_SLOTS); + if ( l4_table_offset(split_va) =3D=3D l4_table_offset(split_va= - 1) ) + { + l3_pgentry_t *l3tab =3D alloc_xen_pagetable(); + + if ( l3tab ) + { + const l3_pgentry_t *l3idle =3D + l4e_to_l3e(idle_pg_table[l4_table_offset(split_va)= ]); + + for ( i =3D 0; i < l3_table_offset(split_va); ++i ) + l3tab[i] =3D l3idle[i]; + for ( ; i <=3D L3_PAGETABLE_ENTRIES; ++i ) + l3tab[i] =3D l3e_empty(); + split_l4e =3D l4e_from_pfn(virt_to_mfn(l3tab), + __PAGE_HYPERVISOR); + } + else + ++root_pgt_pv_xen_slots; + } + } + } +#endif } =20 int page_is_ram_type(unsigned long mfn, unsigned long mem_type) @@ -1320,7 +1362,12 @@ void init_guest_l4_table(l4_pgentry_t l4 /* Xen private mappings. */ memcpy(&l4tab[ROOT_PAGETABLE_FIRST_XEN_SLOT], &idle_pg_table[ROOT_PAGETABLE_FIRST_XEN_SLOT], - ROOT_PAGETABLE_PV_XEN_SLOTS * sizeof(l4_pgentry_t)); + root_pgt_pv_xen_slots * sizeof(l4_pgentry_t)); +#ifndef NDEBUG + if ( l4e_get_intpte(split_l4e) ) + l4tab[ROOT_PAGETABLE_FIRST_XEN_SLOT + root_pgt_pv_xen_slots] =3D + split_l4e; +#endif l4tab[l4_table_offset(LINEAR_PT_VIRT_START)] =3D l4e_from_pfn(domain_page_map_to_mfn(l4tab), __PAGE_HYPERVISOR); l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] =3D --- a/xen/arch/x86/setup.c +++ b/xen/arch/x86/setup.c @@ -82,6 +82,11 @@ boolean_param("noapic", skip_ioapic_setu s8 __read_mostly xen_cpuidle =3D -1; boolean_param("cpuidle", xen_cpuidle); =20 +#ifndef NDEBUG +unsigned long __initdata highmem_start; +size_param("highmem-start", highmem_start); +#endif + cpumask_t __read_mostly cpu_present_map; =20 unsigned long __read_mostly xen_phys_start; @@ -787,6 +792,14 @@ void __init __start_xen(unsigned long mb modules_headroom =3D bzimage_headroom(bootstrap_map(mod), mod->mod_end= ); bootstrap_map(NULL); =20 +#ifndef highmem_start + /* Don't allow split below 4Gb. */ + if ( highmem_start < GB(4) ) + highmem_start =3D 0; + else /* align to L3 entry boundary */ + highmem_start &=3D ~((1UL << L3_PAGETABLE_SHIFT) - 1); +#endif + for ( i =3D boot_e820.nr_map-1; i >=3D 0; i-- ) { uint64_t s, e, mask =3D (1UL << L2_PAGETABLE_SHIFT) - 1; @@ -915,6 +928,9 @@ void __init __start_xen(unsigned long mb /* Don't overlap with other modules. */ end =3D consider_modules(s, e, size, mod, mbi->mods_count, = j); =20 + if ( highmem_start && end > highmem_start ) + continue; + if ( s < end && (headroom || ((end - size) >> PAGE_SHIFT) > mod[j].mod_start) ) @@ -956,6 +972,8 @@ void __init __start_xen(unsigned long mb kexec_reserve_area(&boot_e820); =20 setup_max_pdx(); + if ( highmem_start ) + xenheap_max_mfn(PFN_DOWN(highmem_start)); =20 /* * Walk every RAM region and map it in its entirety (on x86/64, at = least) @@ -1127,7 +1145,8 @@ void __init __start_xen(unsigned long mb unsigned long limit =3D virt_to_mfn(HYPERVISOR_VIRT_END - 1); uint64_t mask =3D PAGE_SIZE - 1; =20 - xenheap_max_mfn(limit); + if ( !highmem_start ) + xenheap_max_mfn(limit); =20 /* Pass the remaining memory to the allocator. */ for ( i =3D 0; i < boot_e820.nr_map; i++ ) --- a/xen/common/page_alloc.c +++ b/xen/common/page_alloc.c @@ -45,6 +45,7 @@ #include #ifdef CONFIG_X86 #include +#include /* for highmem_start only */ #else #define p2m_pod_offline_or_broken_hit(pg) 0 #define p2m_pod_offline_or_broken_replace(pg) BUG_ON(pg !=3D NULL) @@ -203,6 +204,25 @@ unsigned long __init alloc_boot_pages( pg =3D (r->e - nr_pfns) & ~(pfn_align - 1); if ( pg < r->s ) continue; + +#if defined(CONFIG_X86) && !defined(NDEBUG) + /* + * Filtering pfn_align =3D=3D 1 since the only allocations using = a bigger + * alignment are the ones used for setting up the frame table = chunks. + * Those allocations get remapped anyway, i.e. them not having = 1:1 + * mappings always accessible is not a problem. + */ + if ( highmem_start && pfn_align =3D=3D 1 && + r->e > PFN_DOWN(highmem_start) ) + { + pg =3D r->s; + if ( pg + nr_pfns > PFN_DOWN(highmem_start) ) + continue; + r->s =3D pg + nr_pfns; + return pg; + } +#endif + _e =3D r->e; r->e =3D pg; bootmem_region_add(pg + nr_pfns, _e); --- a/xen/include/asm-x86/setup.h +++ b/xen/include/asm-x86/setup.h @@ -43,4 +43,10 @@ void microcode_grab_module( =20 extern uint8_t kbd_shift_flags; =20 +#ifdef NDEBUG +# define highmem_start 0 +#else +extern unsigned long highmem_start; +#endif + #endif --=__Part30019204.0__= Content-Type: text/plain; name="x86-map-domain-debug.patch" Content-Transfer-Encoding: quoted-printable Content-Disposition: attachment; filename="x86-map-domain-debug.patch" x86: debugging code for testing 16Tb support on smaller memory systems=0A= =0ASigned-off-by: Jan Beulich =0A---=0Av2: Removed = unwanted bits and switched to byte-granular "highmem-start"=0A = option.=0A=0A--- a/docs/misc/xen-command-line.markdown=0A+++ b/docs/misc/xe= n-command-line.markdown=0A@@ -546,6 +546,12 @@ Paging (HAP).=0A ### = hvm\_port80=0A > `=3D `=0A =0A+### highmem-start=0A+> `=3D = `=0A+=0A+Specify the memory boundary past which memory will be = treated as highmem (x86=0A+debug hypervisor only).=0A+=0A ### idle\_latency= \_factor=0A > `=3D `=0A =0A--- a/xen/arch/x86/domain_page.c=0A+++ = b/xen/arch/x86/domain_page.c=0A@@ -66,8 +66,10 @@ void *map_domain_page(uns= igned long mfn)=0A struct mapcache_vcpu *vcache;=0A struct = vcpu_maphash_entry *hashent;=0A =0A+#ifdef NDEBUG=0A if ( mfn <=3D = PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) )=0A return mfn_to_virt(mfn= );=0A+#endif=0A =0A v =3D mapcache_current_vcpu();=0A if ( !v || = is_hvm_vcpu(v) )=0A@@ -249,8 +251,10 @@ int mapcache_domain_init(struct = domain *=0A if ( is_hvm_domain(d) || is_idle_domain(d) )=0A = return 0;=0A =0A+#ifdef NDEBUG=0A if ( !mem_hotplug && max_page <=3D = PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) )=0A return 0;=0A+#endif=0A= =0A dcache->l1tab =3D xzalloc_array(l1_pgentry_t *, MAPCACHE_L2_ENTRIE= S + 1);=0A d->arch.perdomain_l2_pg[MAPCACHE_SLOT] =3D alloc_domheap_pag= e(NULL, memf);=0A@@ -418,8 +422,10 @@ void *map_domain_page_global(unsigned= lo=0A =0A ASSERT(!in_irq() && local_irq_is_enabled());=0A =0A+#ifdef = NDEBUG=0A if ( mfn <=3D PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) )=0A = return mfn_to_virt(mfn);=0A+#endif=0A =0A spin_lock(&globalmap_lo= ck);=0A =0A--- a/xen/arch/x86/mm.c=0A+++ b/xen/arch/x86/mm.c=0A@@ -250,6 = +250,14 @@ void __init init_frametable(void)=0A init_spagetable();= =0A }=0A =0A+#ifndef NDEBUG=0A+static unsigned int __read_mostly root_pgt_p= v_xen_slots=0A+ =3D ROOT_PAGETABLE_PV_XEN_SLOTS;=0A+static l4_pgentry_t = __read_mostly split_l4e;=0A+#else=0A+#define root_pgt_pv_xen_slots = ROOT_PAGETABLE_PV_XEN_SLOTS=0A+#endif=0A+=0A void __init arch_init_memory(v= oid)=0A {=0A unsigned long i, pfn, rstart_pfn, rend_pfn, iostart_pfn, = ioend_pfn;=0A@@ -344,6 +352,40 @@ void __init arch_init_memory(void)=0A = efi_init_memory();=0A =0A mem_sharing_init();=0A+=0A+#ifndef = NDEBUG=0A+ if ( highmem_start )=0A+ {=0A+ unsigned long = split_va =3D (unsigned long)__va(highmem_start);=0A+=0A+ if ( = split_va < HYPERVISOR_VIRT_END &&=0A+ split_va - 1 =3D=3D = (unsigned long)__va(highmem_start - 1) )=0A+ {=0A+ = root_pgt_pv_xen_slots =3D l4_table_offset(split_va) -=0A+ = ROOT_PAGETABLE_FIRST_XEN_SLOT;=0A+ ASSERT(root= _pgt_pv_xen_slots < ROOT_PAGETABLE_PV_XEN_SLOTS);=0A+ if ( = l4_table_offset(split_va) =3D=3D l4_table_offset(split_va - 1) )=0A+ = {=0A+ l3_pgentry_t *l3tab =3D alloc_xen_pagetable();=0A= +=0A+ if ( l3tab )=0A+ {=0A+ = const l3_pgentry_t *l3idle =3D=0A+ l4e_to_l3e(idl= e_pg_table[l4_table_offset(split_va)]);=0A+=0A+ for ( i = =3D 0; i < l3_table_offset(split_va); ++i )=0A+ = l3tab[i] =3D l3idle[i];=0A+ for ( ; i <=3D L3_PAGETABLE_= ENTRIES; ++i )=0A+ l3tab[i] =3D l3e_empty();=0A+ = split_l4e =3D l4e_from_pfn(virt_to_mfn(l3tab),=0A+ = __PAGE_HYPERVISOR);=0A+ = }=0A+ else=0A+ ++root_pgt_pv_xen_slots;= =0A+ }=0A+ }=0A+ }=0A+#endif=0A }=0A =0A int = page_is_ram_type(unsigned long mfn, unsigned long mem_type)=0A@@ -1320,7 = +1362,12 @@ void init_guest_l4_table(l4_pgentry_t l4=0A /* Xen private = mappings. */=0A memcpy(&l4tab[ROOT_PAGETABLE_FIRST_XEN_SLOT],=0A = &idle_pg_table[ROOT_PAGETABLE_FIRST_XEN_SLOT],=0A- = ROOT_PAGETABLE_PV_XEN_SLOTS * sizeof(l4_pgentry_t));=0A+ = root_pgt_pv_xen_slots * sizeof(l4_pgentry_t));=0A+#ifndef NDEBUG=0A+ if = ( l4e_get_intpte(split_l4e) )=0A+ l4tab[ROOT_PAGETABLE_FIRST_XEN_SLO= T + root_pgt_pv_xen_slots] =3D=0A+ split_l4e;=0A+#endif=0A = l4tab[l4_table_offset(LINEAR_PT_VIRT_START)] =3D=0A l4e_from_pfn(do= main_page_map_to_mfn(l4tab), __PAGE_HYPERVISOR);=0A l4tab[l4_table_offs= et(PERDOMAIN_VIRT_START)] =3D=0A--- a/xen/arch/x86/setup.c=0A+++ b/xen/arch= /x86/setup.c=0A@@ -82,6 +82,11 @@ boolean_param("noapic", skip_ioapic_setu= =0A s8 __read_mostly xen_cpuidle =3D -1;=0A boolean_param("cpuidle", = xen_cpuidle);=0A =0A+#ifndef NDEBUG=0A+unsigned long __initdata highmem_sta= rt;=0A+size_param("highmem-start", highmem_start);=0A+#endif=0A+=0A = cpumask_t __read_mostly cpu_present_map;=0A =0A unsigned long __read_mostly= xen_phys_start;=0A@@ -787,6 +792,14 @@ void __init __start_xen(unsigned = long mb=0A modules_headroom =3D bzimage_headroom(bootstrap_map(mod), = mod->mod_end);=0A bootstrap_map(NULL);=0A =0A+#ifndef highmem_start=0A+= /* Don't allow split below 4Gb. */=0A+ if ( highmem_start < GB(4) = )=0A+ highmem_start =3D 0;=0A+ else /* align to L3 entry = boundary */=0A+ highmem_start &=3D ~((1UL << L3_PAGETABLE_SHIFT) - = 1);=0A+#endif=0A+=0A for ( i =3D boot_e820.nr_map-1; i >=3D 0; i-- = )=0A {=0A uint64_t s, e, mask =3D (1UL << L2_PAGETABLE_SHIFT) = - 1;=0A@@ -915,6 +928,9 @@ void __init __start_xen(unsigned long mb=0A = /* Don't overlap with other modules. */=0A end =3D = consider_modules(s, e, size, mod, mbi->mods_count, j);=0A =0A+ = if ( highmem_start && end > highmem_start )=0A+ continue;=0A= +=0A if ( s < end &&=0A (headroom ||=0A = ((end - size) >> PAGE_SHIFT) > mod[j].mod_start) )=0A@@ -956,6 = +972,8 @@ void __init __start_xen(unsigned long mb=0A kexec_reserve_are= a(&boot_e820);=0A =0A setup_max_pdx();=0A+ if ( highmem_start )=0A+ = xenheap_max_mfn(PFN_DOWN(highmem_start));=0A =0A /*=0A * = Walk every RAM region and map it in its entirety (on x86/64, at least)=0A@@= -1127,7 +1145,8 @@ void __init __start_xen(unsigned long mb=0A = unsigned long limit =3D virt_to_mfn(HYPERVISOR_VIRT_END - 1);=0A = uint64_t mask =3D PAGE_SIZE - 1;=0A =0A- xenheap_max_mfn(limit);=0A+= if ( !highmem_start )=0A+ xenheap_max_mfn(limit);=0A = =0A /* Pass the remaining memory to the allocator. */=0A = for ( i =3D 0; i < boot_e820.nr_map; i++ )=0A--- a/xen/common/page_alloc.c= =0A+++ b/xen/common/page_alloc.c=0A@@ -45,6 +45,7 @@=0A #include =0A #ifdef CONFIG_X86=0A #include =0A+#include /* for highmem_start only */=0A #else=0A #define p2m_pod_offline_or_brok= en_hit(pg) 0=0A #define p2m_pod_offline_or_broken_replace(pg) BUG_ON(pg = !=3D NULL)=0A@@ -203,6 +204,25 @@ unsigned long __init alloc_boot_pages(=0A= pg =3D (r->e - nr_pfns) & ~(pfn_align - 1);=0A if ( pg < = r->s )=0A continue;=0A+=0A+#if defined(CONFIG_X86) && = !defined(NDEBUG)=0A+ /*=0A+ * Filtering pfn_align =3D=3D 1 = since the only allocations using a bigger=0A+ * alignment are the = ones used for setting up the frame table chunks.=0A+ * Those = allocations get remapped anyway, i.e. them not having 1:1=0A+ * = mappings always accessible is not a problem.=0A+ */=0A+ if = ( highmem_start && pfn_align =3D=3D 1 &&=0A+ r->e > PFN_DOWN(hi= ghmem_start) )=0A+ {=0A+ pg =3D r->s;=0A+ if = ( pg + nr_pfns > PFN_DOWN(highmem_start) )=0A+ continue;=0A+= r->s =3D pg + nr_pfns;=0A+ return pg;=0A+ = }=0A+#endif=0A+=0A _e =3D r->e;=0A r->e =3D pg;=0A = bootmem_region_add(pg + nr_pfns, _e);=0A--- a/xen/include/asm-x86/setup.h= =0A+++ b/xen/include/asm-x86/setup.h=0A@@ -43,4 +43,10 @@ void microcode_gr= ab_module(=0A =0A extern uint8_t kbd_shift_flags;=0A =0A+#ifdef NDEBUG=0A+#= define highmem_start 0=0A+#else=0A+extern unsigned long highmem_start;=0A+= #endif=0A+=0A #endif=0A --=__Part30019204.0__= Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Disposition: inline _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel --=__Part30019204.0__=--