All of lore.kernel.org
 help / color / mirror / Atom feed
* [GIT PULL] x86: add brk allocator for very early allocations
@ 2009-03-11 16:59 Jeremy Fitzhardinge
  2009-03-11 18:19 ` Yinghai Lu
  2009-03-11 19:20 ` Eric W. Biederman
  0 siblings, 2 replies; 12+ messages in thread
From: Jeremy Fitzhardinge @ 2009-03-11 16:59 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Ingo Molnar, the arch/x86 maintainers, Eric W. Biederman,
	Yinghai Lu, Linux Kernel Mailing List

Aggregate patch below.

The following changes since commit 11f5585820ae805c48f41c09bc260d0e51744792:
  Ingo Molnar (1):
        Merge branch 'tracing/ftrace'

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen.git push/x86/brk

Jeremy Fitzhardinge (4):
      x86: make section delimiter symbols part of their section
      x86: add brk allocation for very, very early allocations
      x86-32: use brk segment for allocating initial kernel pagetable
      x86: use brk allocation for DMI

 arch/x86/include/asm/dmi.h        |   14 +-----
 arch/x86/include/asm/pgtable_32.h |    3 -
 arch/x86/include/asm/sections.h   |    7 +++
 arch/x86/include/asm/setup.h      |    7 ++-
 arch/x86/kernel/head32.c          |    5 +--
 arch/x86/kernel/head64.c          |    2 +-
 arch/x86/kernel/head_32.S         |   14 +++---
 arch/x86/kernel/setup.c           |   51 ++++++++++++++-------
 arch/x86/kernel/vmlinux_32.lds.S  |    9 +++-
 arch/x86/kernel/vmlinux_64.lds.S  |   90 ++++++++++++++++++++----------------
 arch/x86/lguest/boot.c            |    8 ---
 arch/x86/mm/pageattr.c            |    5 +-
 arch/x86/xen/mmu.c                |    6 +-
 13 files changed, 118 insertions(+), 103 deletions(-)

diff --git a/arch/x86/include/asm/dmi.h b/arch/x86/include/asm/dmi.h
index bc68212..aa32f7e 100644
--- a/arch/x86/include/asm/dmi.h
+++ b/arch/x86/include/asm/dmi.h
@@ -2,21 +2,11 @@
 #define _ASM_X86_DMI_H
 
 #include <asm/io.h>
+#include <asm/setup.h>
 
-#define DMI_MAX_DATA 2048
-
-extern int dmi_alloc_index;
-extern char dmi_alloc_data[DMI_MAX_DATA];
-
-/* This is so early that there is no good way to allocate dynamic memory.
-   Allocate data in an BSS array. */
 static inline void *dmi_alloc(unsigned len)
 {
-	int idx = dmi_alloc_index;
-	if ((dmi_alloc_index + len) > DMI_MAX_DATA)
-		return NULL;
-	dmi_alloc_index += len;
-	return dmi_alloc_data + idx;
+	return extend_brk(len, sizeof(int));
 }
 
 /* Use early IO mappings for DMI because it's initialized early */
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index 97612fc..31bd120 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -42,9 +42,6 @@ extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t);
  */
 #undef TEST_ACCESS_OK
 
-/* The boot page tables (all created as a single array) */
-extern unsigned long pg0[];
-
 #ifdef CONFIG_X86_PAE
 # include <asm/pgtable-3level.h>
 #else
diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h
index 2b8c516..1b7ee5d 100644
--- a/arch/x86/include/asm/sections.h
+++ b/arch/x86/include/asm/sections.h
@@ -1 +1,8 @@
+#ifndef _ASM_X86_SECTIONS_H
+#define _ASM_X86_SECTIONS_H
+
 #include <asm-generic/sections.h>
+
+extern char __brk_base[], __brk_limit[];
+
+#endif	/* _ASM_X86_SECTIONS_H */
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index 05c6f6b..366d366 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -100,14 +100,15 @@ extern struct boot_params boot_params;
  */
 #define LOWMEMSIZE()	(0x9f000)
 
+/* exceedingly early brk-like allocator */
+extern unsigned long _brk_end;
+void *extend_brk(size_t size, size_t align);
+
 #ifdef __i386__
 
 void __init i386_start_kernel(void);
 extern void probe_roms(void);
 
-extern unsigned long init_pg_tables_start;
-extern unsigned long init_pg_tables_end;
-
 #else
 void __init x86_64_start_kernel(char *real_mode);
 void __init x86_64_start_reservations(char *real_mode_data);
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index ac108d1..3f8579f 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -18,7 +18,7 @@ void __init i386_start_kernel(void)
 {
 	reserve_trampoline_memory();
 
-	reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS");
+	reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
 
 #ifdef CONFIG_BLK_DEV_INITRD
 	/* Reserve INITRD */
@@ -29,9 +29,6 @@ void __init i386_start_kernel(void)
 		reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");
 	}
 #endif
-	reserve_early(init_pg_tables_start, init_pg_tables_end,
-			"INIT_PG_TABLE");
-
 	reserve_ebda_region();
 
 	/*
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index f5b2722..70eaa85 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -100,7 +100,7 @@ void __init x86_64_start_reservations(char *real_mode_data)
 
 	reserve_trampoline_memory();
 
-	reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS");
+	reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
 
 #ifdef CONFIG_BLK_DEV_INITRD
 	/* Reserve INITRD */
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 6219259..d243437 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -167,7 +167,7 @@ num_subarch_entries = (. - subarch_entries) / 4
 /*
  * Initialize page tables.  This creates a PDE and a set of page
  * tables, which are located immediately beyond _end.  The variable
- * init_pg_tables_end is set up to point to the first "safe" location.
+ * _brk_end is set up to point to the first "safe" location.
  * Mappings are created both at virtual address 0 (identity mapping)
  * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END.
  *
@@ -190,8 +190,7 @@ default_entry:
 
 	xorl %ebx,%ebx				/* %ebx is kept at zero */
 
-	movl $pa(pg0), %edi
-	movl %edi, pa(init_pg_tables_start)
+	movl $pa(__brk_base), %edi
 	movl $pa(swapper_pg_pmd), %edx
 	movl $PTE_IDENT_ATTR, %eax
 10:
@@ -216,7 +215,8 @@ default_entry:
 	cmpl %ebp,%eax
 	jb 10b
 1:
-	movl %edi,pa(init_pg_tables_end)
+	addl $__PAGE_OFFSET, %edi
+	movl %edi, pa(_brk_end)
 	shrl $12, %eax
 	movl %eax, pa(max_pfn_mapped)
 
@@ -227,8 +227,7 @@ default_entry:
 
 page_pde_offset = (__PAGE_OFFSET >> 20);
 
-	movl $pa(pg0), %edi
-	movl %edi, pa(init_pg_tables_start)
+	movl $pa(__brk_base), %edi
 	movl $pa(swapper_pg_dir), %edx
 	movl $PTE_IDENT_ATTR, %eax
 10:
@@ -249,7 +248,8 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
 	leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp
 	cmpl %ebp,%eax
 	jb 10b
-	movl %edi,pa(init_pg_tables_end)
+	addl $__PAGE_OFFSET, %edi
+	movl %edi, pa(_brk_end)
 	shrl $12, %eax
 	movl %eax, pa(max_pfn_mapped)
 
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index ce9e888..b344908 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -114,6 +114,9 @@
 
 unsigned int boot_cpu_id __read_mostly;
 
+static __initdata unsigned long _brk_start = (unsigned long)__brk_base;
+unsigned long _brk_end = (unsigned long)__brk_base;
+
 #ifdef CONFIG_X86_64
 int default_cpu_present_to_apicid(int mps_cpu)
 {
@@ -158,12 +161,6 @@ static struct resource bss_resource = {
 
 
 #ifdef CONFIG_X86_32
-/* This value is set up by the early boot code to point to the value
-   immediately after the boot time page tables.  It contains a *physical*
-   address, and must not be in the .bss segment! */
-unsigned long init_pg_tables_start __initdata = ~0UL;
-unsigned long init_pg_tables_end __initdata = ~0UL;
-
 static struct resource video_ram_resource = {
 	.name	= "Video RAM area",
 	.start	= 0xa0000,
@@ -219,12 +216,6 @@ unsigned long mmu_cr4_features = X86_CR4_PAE;
 int bootloader_type;
 
 /*
- * Early DMI memory
- */
-int dmi_alloc_index;
-char dmi_alloc_data[DMI_MAX_DATA];
-
-/*
  * Setup options
  */
 struct screen_info screen_info;
@@ -337,6 +328,34 @@ static void __init relocate_initrd(void)
 }
 #endif
 
+void * __init extend_brk(size_t size, size_t align)
+{
+	size_t mask = align - 1;
+	void *ret;
+
+	BUG_ON(_brk_start == 0);
+	BUG_ON(align & mask);
+
+	_brk_end = (_brk_end + mask) & ~mask;
+	BUG_ON((char *)(_brk_end + size) > __brk_limit);
+
+	ret = (void *)_brk_end;
+	_brk_end += size;
+
+	memset(ret, 0, size);
+
+	return ret;
+}
+
+static void __init reserve_brk(void)
+{
+	if (_brk_end > _brk_start)
+		reserve_early(__pa(_brk_start), __pa(_brk_end), "BRK");
+
+	/* Mark brk area as locked down and no longer taking any new allocations */
+	_brk_start = 0;
+}
+
 static void __init reserve_initrd(void)
 {
 	u64 ramdisk_image = boot_params.hdr.ramdisk_image;
@@ -717,11 +736,7 @@ void __init setup_arch(char **cmdline_p)
 	init_mm.start_code = (unsigned long) _text;
 	init_mm.end_code = (unsigned long) _etext;
 	init_mm.end_data = (unsigned long) _edata;
-#ifdef CONFIG_X86_32
-	init_mm.brk = init_pg_tables_end + PAGE_OFFSET;
-#else
-	init_mm.brk = (unsigned long) &_end;
-#endif
+	init_mm.brk = _brk_end;
 
 	code_resource.start = virt_to_phys(_text);
 	code_resource.end = virt_to_phys(_etext)-1;
@@ -842,6 +857,8 @@ void __init setup_arch(char **cmdline_p)
 	setup_bios_corruption_check();
 #endif
 
+	reserve_brk();
+
 	/* max_pfn_mapped is updated here */
 	max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
 	max_pfn_mapped = max_low_pfn_mapped;
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index 0d86096..1063fbe 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -189,10 +189,13 @@ SECTIONS
 	*(.bss)
 	. = ALIGN(4);
 	__bss_stop = .;
-  	_end = . ;
-	/* This is where the kernel creates the early boot page tables */
+
 	. = ALIGN(PAGE_SIZE);
-	pg0 = . ;
+	__brk_base = . ;
+	. += 1024 * 1024 ;
+	__brk_limit = . ;
+
+  	_end = . ;
   }
 
   /* Sections to be discarded */
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index fbfced6..b8b83e4 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -29,8 +29,8 @@ SECTIONS
 {
   . = __START_KERNEL;
   phys_startup_64 = startup_64 - LOAD_OFFSET;
-  _text = .;			/* Text and read-only data */
   .text :  AT(ADDR(.text) - LOAD_OFFSET) {
+  	_text = .;			/* Text and read-only data */
 	/* First the code that has to be first for bootstrapping */
 	*(.text.head)
 	_stext = .;
@@ -61,13 +61,13 @@ SECTIONS
   .data : AT(ADDR(.data) - LOAD_OFFSET) {
 	DATA_DATA
 	CONSTRUCTORS
+	_edata = .;			/* End of data section */
 	} :data
 
-  _edata = .;			/* End of data section */
 
-  . = ALIGN(PAGE_SIZE);
-  . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
   .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) {
+	. = ALIGN(PAGE_SIZE);
+	. = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
 	*(.data.cacheline_aligned)
   }
   . = ALIGN(CONFIG_X86_INTERNODE_CACHE_BYTES);
@@ -125,29 +125,29 @@ SECTIONS
 #undef VVIRT_OFFSET
 #undef VVIRT
 
-  . = ALIGN(THREAD_SIZE);	/* init_task */
   .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
+	. = ALIGN(THREAD_SIZE);	/* init_task */
 	*(.data.init_task)
   }:data.init
 
-  . = ALIGN(PAGE_SIZE);
   .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
+	. = ALIGN(PAGE_SIZE);
 	*(.data.page_aligned)
   }
 
-  /* might get freed after init */
-  . = ALIGN(PAGE_SIZE);
-  __smp_alt_begin = .;
-  __smp_locks = .;
   .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
+	/* might get freed after init */
+	. = ALIGN(PAGE_SIZE);
+	__smp_alt_begin = .;
+	__smp_locks = .;
 	*(.smp_locks)
+	__smp_locks_end = .;
+	. = ALIGN(PAGE_SIZE);
+	__smp_alt_end = .;
   }
-  __smp_locks_end = .;
-  . = ALIGN(PAGE_SIZE);
-  __smp_alt_end = .;
 
   . = ALIGN(PAGE_SIZE);		/* Init code and data */
-  __init_begin = .;
+  __init_begin = .;	/* paired with __init_end */
   .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
 	_sinittext = .;
 	INIT_TEXT
@@ -159,40 +159,42 @@ SECTIONS
 	__initdata_end = .;
    }
 
-  . = ALIGN(16);
-  __setup_start = .;
-  .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { *(.init.setup) }
-  __setup_end = .;
-  __initcall_start = .;
+  .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) {
+	. = ALIGN(16);
+	__setup_start = .;
+	*(.init.setup)
+	__setup_end = .;
+  }
   .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
+	__initcall_start = .;
 	INITCALLS
+	__initcall_end = .;
   }
-  __initcall_end = .;
-  __con_initcall_start = .;
   .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
+	__con_initcall_start = .;
 	*(.con_initcall.init)
+	__con_initcall_end = .;
   }
-  __con_initcall_end = .;
-  __x86_cpu_dev_start = .;
   .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
+	__x86_cpu_dev_start = .;
 	*(.x86_cpu_dev.init)
+	__x86_cpu_dev_end = .;
   }
-  __x86_cpu_dev_end = .;
   SECURITY_INIT
 
   . = ALIGN(8);
   .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
-  __parainstructions = .;
+	__parainstructions = .;
        *(.parainstructions)
-  __parainstructions_end = .;
+	__parainstructions_end = .;
   }
 
-  . = ALIGN(8);
-  __alt_instructions = .;
   .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
+	. = ALIGN(8);
+	__alt_instructions = .;
 	*(.altinstructions)
+	__alt_instructions_end = .;
   }
-  __alt_instructions_end = .;
   .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
 	*(.altinstr_replacement)
   }
@@ -207,9 +209,11 @@ SECTIONS
 
 #ifdef CONFIG_BLK_DEV_INITRD
   . = ALIGN(PAGE_SIZE);
-  __initramfs_start = .;
-  .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { *(.init.ramfs) }
-  __initramfs_end = .;
+  .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
+	__initramfs_start = .;
+	*(.init.ramfs)
+	__initramfs_end = .;
+  }
 #endif
 
 #ifdef CONFIG_SMP
@@ -229,20 +233,26 @@ SECTIONS
   . = ALIGN(PAGE_SIZE);
   __init_end = .;
 
-  . = ALIGN(PAGE_SIZE);
-  __nosave_begin = .;
   .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
-      *(.data.nosave)
+	. = ALIGN(PAGE_SIZE);
+	__nosave_begin = .;
+	*(.data.nosave)
+	. = ALIGN(PAGE_SIZE);
+	__nosave_end = .;
   } :data.init2 /* use another section data.init2, see PERCPU_VADDR() above */
-  . = ALIGN(PAGE_SIZE);
-  __nosave_end = .;
 
-  __bss_start = .;		/* BSS */
   .bss : AT(ADDR(.bss) - LOAD_OFFSET) {
+	. = ALIGN(PAGE_SIZE);
+	__bss_start = .;		/* BSS */
 	*(.bss.page_aligned)
 	*(.bss)
-	}
-  __bss_stop = .;
+	__bss_stop = .;
+
+ 	. = ALIGN(PAGE_SIZE);
+ 	__brk_base = . ;
+ 	. += 1024 * 1024 ;
+ 	__brk_limit = . ;
+  }
 
   _end = . ;
 
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 9fe4dda..90e44a1 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -1058,14 +1058,6 @@ __init void lguest_init(void)
 	 * lguest_init() where the rest of the fairly chaotic boot setup
 	 * occurs. */
 
-	/* The native boot code sets up initial page tables immediately after
-	 * the kernel itself, and sets init_pg_tables_end so they're not
-	 * clobbered.  The Launcher places our initial pagetables somewhere at
-	 * the top of our physical memory, so we don't need extra space: set
-	 * init_pg_tables_end to the end of the kernel. */
-	init_pg_tables_start = __pa(pg0);
-	init_pg_tables_end = __pa(pg0);
-
 	/* As described in head_32.S, we map the first 128M of memory. */
 	max_pfn_mapped = (128*1024*1024) >> PAGE_SHIFT;
 
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 4629a87..8eb4eaa 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -16,6 +16,7 @@
 #include <asm/processor.h>
 #include <asm/tlbflush.h>
 #include <asm/sections.h>
+#include <asm/setup.h>
 #include <asm/uaccess.h>
 #include <asm/pgalloc.h>
 #include <asm/proto.h>
@@ -95,7 +96,7 @@ static inline unsigned long highmap_start_pfn(void)
 
 static inline unsigned long highmap_end_pfn(void)
 {
-	return __pa(roundup((unsigned long)_end, PMD_SIZE)) >> PAGE_SHIFT;
+	return __pa(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT;
 }
 
 #endif
@@ -700,7 +701,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
 	 * No need to redo, when the primary call touched the high
 	 * mapping already:
 	 */
-	if (within(vaddr, (unsigned long) _text, (unsigned long) _end))
+	if (within(vaddr, (unsigned long) _text, _brk_end))
 		return 0;
 
 	/*
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index cb6afa4..72f6a76 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1723,9 +1723,9 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
 {
 	pmd_t *kernel_pmd;
 
-	init_pg_tables_start = __pa(pgd);
-	init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE;
-	max_pfn_mapped = PFN_DOWN(init_pg_tables_end + 512*1024);
+	max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) +
+				  xen_start_info->nr_pt_frames * PAGE_SIZE +
+				  512*1024);
 
 	kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
 	memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD);



^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [GIT PULL] x86: add brk allocator for very early allocations
  2009-03-11 16:59 [GIT PULL] x86: add brk allocator for very early allocations Jeremy Fitzhardinge
@ 2009-03-11 18:19 ` Yinghai Lu
  2009-03-12 23:59   ` Jeremy Fitzhardinge
  2009-03-11 19:20 ` Eric W. Biederman
  1 sibling, 1 reply; 12+ messages in thread
From: Yinghai Lu @ 2009-03-11 18:19 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: H. Peter Anvin, Ingo Molnar, the arch/x86 maintainers,
	Eric W. Biederman, Linux Kernel Mailing List

Jeremy Fitzhardinge wrote:
> Aggregate patch below.
> 
> The following changes since commit
> 11f5585820ae805c48f41c09bc260d0e51744792:
>  Ingo Molnar (1):
>        Merge branch 'tracing/ftrace'
> 
> are available in the git repository at:
> 
>  git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen.git push/x86/brk
> 
> Jeremy Fitzhardinge (4):
>      x86: make section delimiter symbols part of their section
>      x86: add brk allocation for very, very early allocations
>      x86-32: use brk segment for allocating initial kernel pagetable
>      x86: use brk allocation for DMI
> 
> arch/x86/include/asm/dmi.h        |   14 +-----
> arch/x86/include/asm/pgtable_32.h |    3 -
> arch/x86/include/asm/sections.h   |    7 +++
> arch/x86/include/asm/setup.h      |    7 ++-
> arch/x86/kernel/head32.c          |    5 +--
> arch/x86/kernel/head64.c          |    2 +-
> arch/x86/kernel/head_32.S         |   14 +++---
> arch/x86/kernel/setup.c           |   51 ++++++++++++++-------
> arch/x86/kernel/vmlinux_32.lds.S  |    9 +++-
> arch/x86/kernel/vmlinux_64.lds.S  |   90
> ++++++++++++++++++++----------------
> arch/x86/lguest/boot.c            |    8 ---
> arch/x86/mm/pageattr.c            |    5 +-
> arch/x86/xen/mmu.c                |    6 +-
> 13 files changed, 118 insertions(+), 103 deletions(-)
> 
> diff --git a/arch/x86/include/asm/dmi.h b/arch/x86/include/asm/dmi.h
> index bc68212..aa32f7e 100644
> --- a/arch/x86/include/asm/dmi.h
> +++ b/arch/x86/include/asm/dmi.h
> @@ -2,21 +2,11 @@
> #define _ASM_X86_DMI_H
> 
> #include <asm/io.h>
> +#include <asm/setup.h>
> 
> -#define DMI_MAX_DATA 2048
> -
> -extern int dmi_alloc_index;
> -extern char dmi_alloc_data[DMI_MAX_DATA];
> -
> -/* This is so early that there is no good way to allocate dynamic memory.
> -   Allocate data in an BSS array. */
> static inline void *dmi_alloc(unsigned len)
> {
> -    int idx = dmi_alloc_index;
> -    if ((dmi_alloc_index + len) > DMI_MAX_DATA)
> -        return NULL;
> -    dmi_alloc_index += len;
> -    return dmi_alloc_data + idx;
> +    return extend_brk(len, sizeof(int));
> }
> 
> /* Use early IO mappings for DMI because it's initialized early */
> diff --git a/arch/x86/include/asm/pgtable_32.h
> b/arch/x86/include/asm/pgtable_32.h
> index 97612fc..31bd120 100644
> --- a/arch/x86/include/asm/pgtable_32.h
> +++ b/arch/x86/include/asm/pgtable_32.h
> @@ -42,9 +42,6 @@ extern void set_pmd_pfn(unsigned long, unsigned long,
> pgprot_t);
>  */
> #undef TEST_ACCESS_OK
> 
> -/* The boot page tables (all created as a single array) */
> -extern unsigned long pg0[];
> -
> #ifdef CONFIG_X86_PAE
> # include <asm/pgtable-3level.h>
> #else
> diff --git a/arch/x86/include/asm/sections.h
> b/arch/x86/include/asm/sections.h
> index 2b8c516..1b7ee5d 100644
> --- a/arch/x86/include/asm/sections.h
> +++ b/arch/x86/include/asm/sections.h
> @@ -1 +1,8 @@
> +#ifndef _ASM_X86_SECTIONS_H
> +#define _ASM_X86_SECTIONS_H
> +
> #include <asm-generic/sections.h>
> +
> +extern char __brk_base[], __brk_limit[];
> +
> +#endif    /* _ASM_X86_SECTIONS_H */
> diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
> index 05c6f6b..366d366 100644
> --- a/arch/x86/include/asm/setup.h
> +++ b/arch/x86/include/asm/setup.h
> @@ -100,14 +100,15 @@ extern struct boot_params boot_params;
>  */
> #define LOWMEMSIZE()    (0x9f000)
> 
> +/* exceedingly early brk-like allocator */
> +extern unsigned long _brk_end;
> +void *extend_brk(size_t size, size_t align);
> +
> #ifdef __i386__
> 
> void __init i386_start_kernel(void);
> extern void probe_roms(void);
> 
> -extern unsigned long init_pg_tables_start;
> -extern unsigned long init_pg_tables_end;
> -
> #else
> void __init x86_64_start_kernel(char *real_mode);
> void __init x86_64_start_reservations(char *real_mode_data);
> diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
> index ac108d1..3f8579f 100644
> --- a/arch/x86/kernel/head32.c
> +++ b/arch/x86/kernel/head32.c
> @@ -18,7 +18,7 @@ void __init i386_start_kernel(void)
> {
>     reserve_trampoline_memory();
> 
> -    reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA
> BSS");
> +    reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT
> DATA BSS");
> 
> #ifdef CONFIG_BLK_DEV_INITRD
>     /* Reserve INITRD */
> @@ -29,9 +29,6 @@ void __init i386_start_kernel(void)
>         reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");
>     }
> #endif
> -    reserve_early(init_pg_tables_start, init_pg_tables_end,
> -            "INIT_PG_TABLE");
> -
>     reserve_ebda_region();
> 
>     /*
> diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
> index f5b2722..70eaa85 100644
> --- a/arch/x86/kernel/head64.c
> +++ b/arch/x86/kernel/head64.c
> @@ -100,7 +100,7 @@ void __init x86_64_start_reservations(char
> *real_mode_data)
> 
>     reserve_trampoline_memory();
> 
> -    reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA
> BSS");
> +    reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT
> DATA BSS");
> 
> #ifdef CONFIG_BLK_DEV_INITRD
>     /* Reserve INITRD */
> diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
> index 6219259..d243437 100644
> --- a/arch/x86/kernel/head_32.S
> +++ b/arch/x86/kernel/head_32.S
> @@ -167,7 +167,7 @@ num_subarch_entries = (. - subarch_entries) / 4
> /*
>  * Initialize page tables.  This creates a PDE and a set of page
>  * tables, which are located immediately beyond _end.  The variable
> - * init_pg_tables_end is set up to point to the first "safe" location.
> + * _brk_end is set up to point to the first "safe" location.
>  * Mappings are created both at virtual address 0 (identity mapping)
>  * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END.
>  *
> @@ -190,8 +190,7 @@ default_entry:
> 
>     xorl %ebx,%ebx                /* %ebx is kept at zero */
> 
> -    movl $pa(pg0), %edi
> -    movl %edi, pa(init_pg_tables_start)
> +    movl $pa(__brk_base), %edi
>     movl $pa(swapper_pg_pmd), %edx
>     movl $PTE_IDENT_ATTR, %eax
> 10:
> @@ -216,7 +215,8 @@ default_entry:
>     cmpl %ebp,%eax
>     jb 10b
> 1:
> -    movl %edi,pa(init_pg_tables_end)
> +    addl $__PAGE_OFFSET, %edi
> +    movl %edi, pa(_brk_end)
>     shrl $12, %eax
>     movl %eax, pa(max_pfn_mapped)
> 
> @@ -227,8 +227,7 @@ default_entry:
> 
> page_pde_offset = (__PAGE_OFFSET >> 20);
> 
> -    movl $pa(pg0), %edi
> -    movl %edi, pa(init_pg_tables_start)
> +    movl $pa(__brk_base), %edi
>     movl $pa(swapper_pg_dir), %edx
>     movl $PTE_IDENT_ATTR, %eax
> 10:
> @@ -249,7 +248,8 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
>     leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp
>     cmpl %ebp,%eax
>     jb 10b
> -    movl %edi,pa(init_pg_tables_end)
> +    addl $__PAGE_OFFSET, %edi
> +    movl %edi, pa(_brk_end)
>     shrl $12, %eax
>     movl %eax, pa(max_pfn_mapped)
> 
> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
> index ce9e888..b344908 100644
> --- a/arch/x86/kernel/setup.c
> +++ b/arch/x86/kernel/setup.c
> @@ -114,6 +114,9 @@
> 
> unsigned int boot_cpu_id __read_mostly;
> 
> +static __initdata unsigned long _brk_start = (unsigned long)__brk_base;
> +unsigned long _brk_end = (unsigned long)__brk_base;
> +
> #ifdef CONFIG_X86_64
> int default_cpu_present_to_apicid(int mps_cpu)
> {
> @@ -158,12 +161,6 @@ static struct resource bss_resource = {
> 
> 
> #ifdef CONFIG_X86_32
> -/* This value is set up by the early boot code to point to the value
> -   immediately after the boot time page tables.  It contains a *physical*
> -   address, and must not be in the .bss segment! */
> -unsigned long init_pg_tables_start __initdata = ~0UL;
> -unsigned long init_pg_tables_end __initdata = ~0UL;
> -
> static struct resource video_ram_resource = {
>     .name    = "Video RAM area",
>     .start    = 0xa0000,
> @@ -219,12 +216,6 @@ unsigned long mmu_cr4_features = X86_CR4_PAE;
> int bootloader_type;
> 
> /*
> - * Early DMI memory
> - */
> -int dmi_alloc_index;
> -char dmi_alloc_data[DMI_MAX_DATA];
> -
> -/*
>  * Setup options
>  */
> struct screen_info screen_info;
> @@ -337,6 +328,34 @@ static void __init relocate_initrd(void)
> }
> #endif
> 
> +void * __init extend_brk(size_t size, size_t align)
> +{
> +    size_t mask = align - 1;
> +    void *ret;
> +
> +    BUG_ON(_brk_start == 0);
> +    BUG_ON(align & mask);
> +
> +    _brk_end = (_brk_end + mask) & ~mask;
> +    BUG_ON((char *)(_brk_end + size) > __brk_limit);
> +
> +    ret = (void *)_brk_end;
> +    _brk_end += size;
> +
> +    memset(ret, 0, size);
> +
> +    return ret;
> +}
> +
> +static void __init reserve_brk(void)
> +{
> +    if (_brk_end > _brk_start)
> +        reserve_early(__pa(_brk_start), __pa(_brk_end), "BRK");
> +
> +    /* Mark brk area as locked down and no longer taking any new
> allocations */
> +    _brk_start = 0;
> +}
> +
> static void __init reserve_initrd(void)
> {
>     u64 ramdisk_image = boot_params.hdr.ramdisk_image;
> @@ -717,11 +736,7 @@ void __init setup_arch(char **cmdline_p)
>     init_mm.start_code = (unsigned long) _text;
>     init_mm.end_code = (unsigned long) _etext;
>     init_mm.end_data = (unsigned long) _edata;
> -#ifdef CONFIG_X86_32
> -    init_mm.brk = init_pg_tables_end + PAGE_OFFSET;
> -#else
> -    init_mm.brk = (unsigned long) &_end;
> -#endif
> +    init_mm.brk = _brk_end;
> 
>     code_resource.start = virt_to_phys(_text);
>     code_resource.end = virt_to_phys(_etext)-1;
> @@ -842,6 +857,8 @@ void __init setup_arch(char **cmdline_p)
>     setup_bios_corruption_check();
> #endif
> 
> +    reserve_brk();
> +
>     /* max_pfn_mapped is updated here */
>     max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
>     max_pfn_mapped = max_low_pfn_mapped;
> diff --git a/arch/x86/kernel/vmlinux_32.lds.S
> b/arch/x86/kernel/vmlinux_32.lds.S
> index 0d86096..1063fbe 100644
> --- a/arch/x86/kernel/vmlinux_32.lds.S
> +++ b/arch/x86/kernel/vmlinux_32.lds.S
> @@ -189,10 +189,13 @@ SECTIONS
>     *(.bss)
>     . = ALIGN(4);
>     __bss_stop = .;
> -      _end = . ;
> -    /* This is where the kernel creates the early boot page tables */
> +
>     . = ALIGN(PAGE_SIZE);
> -    pg0 = . ;
> +    __brk_base = . ;
> +    . += 1024 * 1024 ;
> +    __brk_limit = . ;

could have more explanation about the 1M size.
because initial_pg_tables will sit in it. please consider to add something like

in head_32.S

 LOW_PAGES = (KERNEL_IMAGE_SIZE + PAGE_SIZE_asm - 1)>>PAGE_SHIFT
 
 #if PTRS_PER_PMD > 1
 PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PMD) + PTRS_PER_PGD
 #else
 PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PGD)
 #endif
 ALLOCATOR_SLOP = 4
 

INIT_MAP_SIZE = (PAGE_TABLE_SIZE + ALLOCATOR_SLOP) * PAGE_SIZE_asm

...


+
+.section ".bss.extra_page_aligned","wa"
+       .align PAGE_SIZE_asm
+       .fill INIT_MAP_SIZE,1,0

@@ -205,6 +208,12 @@ SECTIONS
   DWARF_DEBUG
 }
 
+/*
+ * Build-time check on the image size:
+ */
+ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
+       "kernel image bigger than KERNEL_IMAGE_SIZE")
+
 #ifdef CONFIG_KEXEC
 /* Link time checks */
 #include <asm/kexec.h>
Index: linux-2.6/arch/x86/include/asm/page_32_types.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/page_32_types.h
+++ linux-2.6/arch/x86/include/asm/page_32_types.h
@@ -39,6 +39,11 @@
 #define __VIRTUAL_MASK_SHIFT   32
 #endif /* CONFIG_X86_PAE */
 
+/*
+ * Kernel image size is limited to 512 MB (see in arch/x86/kernel/head_32.S)
+ */
+#define KERNEL_IMAGE_SIZE      (512 * 1024 * 1024)
+

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [GIT PULL] x86: add brk allocator for very early allocations
  2009-03-11 16:59 [GIT PULL] x86: add brk allocator for very early allocations Jeremy Fitzhardinge
  2009-03-11 18:19 ` Yinghai Lu
@ 2009-03-11 19:20 ` Eric W. Biederman
  2009-03-11 23:53   ` Jeremy Fitzhardinge
  1 sibling, 1 reply; 12+ messages in thread
From: Eric W. Biederman @ 2009-03-11 19:20 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: H. Peter Anvin, Ingo Molnar, the arch/x86 maintainers,
	Yinghai Lu, Linux Kernel Mailing List

Jeremy Fitzhardinge <jeremy@goop.org> writes:

> Aggregate patch below.
>
> The following changes since commit 11f5585820ae805c48f41c09bc260d0e51744792:
>  Ingo Molnar (1):
>        Merge branch 'tracing/ftrace'
>
> are available in the git repository at:
>
>  git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen.git push/x86/brk
>
> Jeremy Fitzhardinge (4):
>      x86: make section delimiter symbols part of their section
>      x86: add brk allocation for very, very early allocations
>      x86-32: use brk segment for allocating initial kernel pagetable
>      x86: use brk allocation for DMI

Nacked-by: "Eric W. Biederman" <ebiederm@xmission.com>

extend_brk is the wrong way to go.  We already have a better mechanism.
find_e820_early paired with reserve_early.

Allocating the early page tables are a very special case.   There is
a case for cleaning up that mechanism and making more comprehensible.
We should not be generalizing it, and making the kernel more fragile.

Overall I think there is a lot of good work in the patch, but taken
as a whole it seems to be moving us in the wrong direction.

Eric

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [GIT PULL] x86: add brk allocator for very early allocations
  2009-03-11 19:20 ` Eric W. Biederman
@ 2009-03-11 23:53   ` Jeremy Fitzhardinge
  0 siblings, 0 replies; 12+ messages in thread
From: Jeremy Fitzhardinge @ 2009-03-11 23:53 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: H. Peter Anvin, Ingo Molnar, the arch/x86 maintainers,
	Yinghai Lu, Linux Kernel Mailing List

Eric W. Biederman wrote:
> extend_brk is the wrong way to go.  We already have a better mechanism.
> find_e820_early paired with reserve_early.
>   

No doubt that's a better option when available.  But I want to allocate 
earlier than that.

> Allocating the early page tables are a very special case.   There is
> a case for cleaning up that mechanism and making more comprehensible.
> We should not be generalizing it, and making the kernel more fragile.
>   

More fragile?  I don't see that extend_brk() is a particularly fragile 
mechanism.  I guess a user could start over-using it and running out of 
the initial space.  That would fail in a fairly unambiguous way (there's 
a BUG_ON checking for an attempt to extend beyond __brk_limit), and 
would definitely be an abuse of the call.

My motivation for this patch is to dynamically allocate things very 
early, before the e820 map is available.  Specifically, I want to 
dynamically allocate various Xen datastructures which would otherwise 
statically waste space in the bss (in the case where you're running a 
Xen-enabled kernel on a non-Xen system).  It also allows me to scale 
them according to the memory size, etc.  I need to do this before e820 
is available; indeed, I need to do it to synthesize an appropriate e820 
map for the kernel to consume.

It is also nice that it generalizes head_32.S's pagetable construction, 
and mops up the bespoke DMI allocator.  There may well be some other 
potential users.  I think any static data in bss is fair game, 
particularly if it is only used conditionally.

    J

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [GIT PULL] x86: add brk allocator for very early allocations
  2009-03-11 18:19 ` Yinghai Lu
@ 2009-03-12 23:59   ` Jeremy Fitzhardinge
  2009-03-13  0:44     ` Yinghai Lu
  0 siblings, 1 reply; 12+ messages in thread
From: Jeremy Fitzhardinge @ 2009-03-12 23:59 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: H. Peter Anvin, Ingo Molnar, the arch/x86 maintainers,
	Eric W. Biederman, Linux Kernel Mailing List

Yinghai Lu wrote:
> could have more explanation about the 1M size.
> because initial_pg_tables will sit in it. please consider to add something like
>
> in head_32.S
>
>  LOW_PAGES = (KERNEL_IMAGE_SIZE + PAGE_SIZE_asm - 1)>>PAGE_SHIFT
>  
>  #if PTRS_PER_PMD > 1
>  PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PMD) + PTRS_PER_PGD
>  #else
>  PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PGD)
>  #endif
>  ALLOCATOR_SLOP = 4
>   

OK, how does this look:

The following changes since commit 21e8ba72daf5d7f0af33968f873499c85f96ccef:
  Jeremy Fitzhardinge (1):
        x86: use brk allocation for DMI

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen.git push/x86/brk

Jeremy Fitzhardinge (1):
      x86: allow extend_brk users to reserve brk space

Yinghai Lu (1):
      x86-32: compute initial mapping size more accurately

 arch/x86/include/asm/page_32_types.h |    5 +++++
 arch/x86/include/asm/setup.h         |   30 ++++++++++++++++++++++++++++++
 arch/x86/kernel/head_32.S            |    4 +++-
 arch/x86/kernel/setup.c              |    2 ++
 arch/x86/kernel/vmlinux_32.lds.S     |    4 +++-
 arch/x86/kernel/vmlinux_64.lds.S     |    4 +++-
 6 files changed, 46 insertions(+), 3 deletions(-)

git diff 21e8ba72daf5d7f0af33968f873499c85f96ccef..push/x86/brk
diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h
index f1e4a79..0f915ae 100644
--- a/arch/x86/include/asm/page_32_types.h
+++ b/arch/x86/include/asm/page_32_types.h
@@ -39,6 +39,11 @@
 #define __VIRTUAL_MASK_SHIFT	32
 #endif	/* CONFIG_X86_PAE */
 
+/*
+ * Kernel image size is limited to 512 MB (see in arch/x86/kernel/head_32.S)
+ */
+#define KERNEL_IMAGE_SIZE	(512 * 1024 * 1024)
+
 #ifndef __ASSEMBLY__
 
 /*
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index 366d366..61b126b 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -104,6 +104,29 @@ extern struct boot_params boot_params;
 extern unsigned long _brk_end;
 void *extend_brk(size_t size, size_t align);
 
+/*
+ * Reserve space in the brk section.  The name must be unique within
+ * the file, and somewhat descriptive.  The size is in bytes.  Must be
+ * used at file scope.
+ *
+ * (This uses a temp function to wrap the asm so we can pass it the
+ * size parameter; otherwise we wouldn't be able to.  We can't use a
+ * "section" attribute on a normal variable because it always ends up
+ * being @progbits, which ends up allocating space in the vmlinux
+ * executable.)
+ */
+#define RESERVE_BRK(name,sz)						\
+	static void __section(.discard) __used			\
+	__brk_reservation_fn_##name##__(void) {				\
+		asm volatile (						\
+			".pushsection .brk_reservation,\"aw\",@nobits;" \
+			"__brk_reservation_" #name "__:"		\
+			" 1:.skip %c0;"					\
+			" .size __brk_reservation_" #name "__, . - 1b;"	\
+			" .popsection"					\
+			: : "i" (sz));					\
+	}
+
 #ifdef __i386__
 
 void __init i386_start_kernel(void);
@@ -115,6 +138,13 @@ void __init x86_64_start_reservations(char *real_mode_data);
 
 #endif /* __i386__ */
 #endif /* _SETUP */
+#else
+#define RESERVE_BRK(name,sz)				\
+	.pushsection .brk_reservation,"aw",@nobits;	\
+__brk_reservation_##name##__:				\
+1:	.skip sz;					\
+	.size __brk_reservation_##name##__,.-1b;	\
+	.popsection
 #endif /* __ASSEMBLY__ */
 #endif  /*  __KERNEL__  */
 
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index d243437..80dc05e 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -54,7 +54,7 @@
  *
  * This should be a multiple of a page.
  */
-LOW_PAGES = 1<<(32-PAGE_SHIFT_asm)
+LOW_PAGES = (KERNEL_IMAGE_SIZE + PAGE_SIZE_asm - 1)>>PAGE_SHIFT
 
 /*
  * To preserve the DMA pool in PAGEALLOC kernels, we'll allocate
@@ -75,6 +75,8 @@ ALLOCATOR_SLOP = 4
 
 INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_SIZE_asm
 
+RESERVE_BRK(pagetables, PAGE_TABLE_SIZE * PAGE_SIZE)
+
 /*
  * 32-bit kernel entrypoint; only used by the boot CPU.  On entry,
  * %esi points to the real-mode code as a 32-bit pointer.
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index b344908..d633958 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -112,6 +112,8 @@
 #define ARCH_SETUP
 #endif
 
+RESERVE_BRK(dmi_alloc, 65536);
+
 unsigned int boot_cpu_id __read_mostly;
 
 static __initdata unsigned long _brk_start = (unsigned long)__brk_base;
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index 1063fbe..4005279 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -192,7 +192,8 @@ SECTIONS
 
 	. = ALIGN(PAGE_SIZE);
 	__brk_base = . ;
-	. += 1024 * 1024 ;
+ 	. += 64 * 1024 ;	/* 64k slop space */
+	*(.brk_reservation)	/* areas brk users have reserved */
 	__brk_limit = . ;
 
   	_end = . ;
@@ -201,6 +202,7 @@ SECTIONS
   /* Sections to be discarded */
   /DISCARD/ : {
 	*(.exitcall.exit)
+	*(.discard)
 	}
 
   STABS_DEBUG
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index b8b83e4..47deee3 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -250,7 +250,8 @@ SECTIONS
 
  	. = ALIGN(PAGE_SIZE);
  	__brk_base = . ;
- 	. += 1024 * 1024 ;
+ 	. += 64 * 1024 ;	/* 64k slop space */
+	*(.brk_reservation)	/* areas brk users have reserved */
  	__brk_limit = . ;
   }
 
@@ -260,6 +261,7 @@ SECTIONS
   /DISCARD/ : {
 	*(.exitcall.exit)
 	*(.eh_frame)
+	*(.discard)
 	}
 
   STABS_DEBUG



^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [GIT PULL] x86: add brk allocator for very early allocations
  2009-03-12 23:59   ` Jeremy Fitzhardinge
@ 2009-03-13  0:44     ` Yinghai Lu
  2009-03-13 20:27       ` Jeremy Fitzhardinge
  0 siblings, 1 reply; 12+ messages in thread
From: Yinghai Lu @ 2009-03-13  0:44 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: H. Peter Anvin, Ingo Molnar, the arch/x86 maintainers,
	Eric W. Biederman, Linux Kernel Mailing List

Jeremy Fitzhardinge wrote:
> Yinghai Lu wrote:
>> could have more explanation about the 1M size.
>> because initial_pg_tables will sit in it. please consider to add
>> something like
>>
>> in head_32.S
>>
>>  LOW_PAGES = (KERNEL_IMAGE_SIZE + PAGE_SIZE_asm - 1)>>PAGE_SHIFT
>>  
>>  #if PTRS_PER_PMD > 1
>>  PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PMD) + PTRS_PER_PGD
>>  #else
>>  PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PGD)
>>  #endif
>>  ALLOCATOR_SLOP = 4
>>   
> 
> OK, how does this look:
> 
> The following changes since commit
> 21e8ba72daf5d7f0af33968f873499c85f96ccef:
>  Jeremy Fitzhardinge (1):
>        x86: use brk allocation for DMI
> 
> are available in the git repository at:
> 
>  git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen.git push/x86/brk
> 
> Jeremy Fitzhardinge (1):
>      x86: allow extend_brk users to reserve brk space
> 
> Yinghai Lu (1):
>      x86-32: compute initial mapping size more accurately
> 
> arch/x86/include/asm/page_32_types.h |    5 +++++
> arch/x86/include/asm/setup.h         |   30 ++++++++++++++++++++++++++++++
> arch/x86/kernel/head_32.S            |    4 +++-
> arch/x86/kernel/setup.c              |    2 ++
> arch/x86/kernel/vmlinux_32.lds.S     |    4 +++-
> arch/x86/kernel/vmlinux_64.lds.S     |    4 +++-
> 6 files changed, 46 insertions(+), 3 deletions(-)
> 
> git diff 21e8ba72daf5d7f0af33968f873499c85f96ccef..push/x86/brk
> diff --git a/arch/x86/include/asm/page_32_types.h
> b/arch/x86/include/asm/page_32_types.h
> index f1e4a79..0f915ae 100644
> --- a/arch/x86/include/asm/page_32_types.h
> +++ b/arch/x86/include/asm/page_32_types.h
> @@ -39,6 +39,11 @@
> #define __VIRTUAL_MASK_SHIFT    32
> #endif    /* CONFIG_X86_PAE */
> 
> +/*
> + * Kernel image size is limited to 512 MB (see in
> arch/x86/kernel/head_32.S)
> + */
> +#define KERNEL_IMAGE_SIZE    (512 * 1024 * 1024)
> +
> #ifndef __ASSEMBLY__
> 
> /*
> diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
> index 366d366..61b126b 100644
> --- a/arch/x86/include/asm/setup.h
> +++ b/arch/x86/include/asm/setup.h
> @@ -104,6 +104,29 @@ extern struct boot_params boot_params;
> extern unsigned long _brk_end;
> void *extend_brk(size_t size, size_t align);
> 
> +/*
> + * Reserve space in the brk section.  The name must be unique within
> + * the file, and somewhat descriptive.  The size is in bytes.  Must be
> + * used at file scope.
> + *
> + * (This uses a temp function to wrap the asm so we can pass it the
> + * size parameter; otherwise we wouldn't be able to.  We can't use a
> + * "section" attribute on a normal variable because it always ends up
> + * being @progbits, which ends up allocating space in the vmlinux
> + * executable.)
> + */
> +#define RESERVE_BRK(name,sz)                        \
> +    static void __section(.discard) __used            \
> +    __brk_reservation_fn_##name##__(void) {                \
> +        asm volatile (                        \
> +            ".pushsection .brk_reservation,\"aw\",@nobits;" \
> +            "__brk_reservation_" #name "__:"        \
> +            " 1:.skip %c0;"                    \
> +            " .size __brk_reservation_" #name "__, . - 1b;"    \
> +            " .popsection"                    \
> +            : : "i" (sz));                    \
> +    }
> +
> #ifdef __i386__
> 
> void __init i386_start_kernel(void);
> @@ -115,6 +138,13 @@ void __init x86_64_start_reservations(char
> *real_mode_data);
> 
> #endif /* __i386__ */
> #endif /* _SETUP */
> +#else
> +#define RESERVE_BRK(name,sz)                \
> +    .pushsection .brk_reservation,"aw",@nobits;    \
> +__brk_reservation_##name##__:                \
> +1:    .skip sz;                    \
> +    .size __brk_reservation_##name##__,.-1b;    \
> +    .popsection
> #endif /* __ASSEMBLY__ */
> #endif  /*  __KERNEL__  */
> 
> diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
> index d243437..80dc05e 100644
> --- a/arch/x86/kernel/head_32.S
> +++ b/arch/x86/kernel/head_32.S
> @@ -54,7 +54,7 @@
>  *
>  * This should be a multiple of a page.
>  */
> -LOW_PAGES = 1<<(32-PAGE_SHIFT_asm)
> +LOW_PAGES = (KERNEL_IMAGE_SIZE + PAGE_SIZE_asm - 1)>>PAGE_SHIFT
> 
> /*
>  * To preserve the DMA pool in PAGEALLOC kernels, we'll allocate
> @@ -75,6 +75,8 @@ ALLOCATOR_SLOP = 4
> 
> INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE +
> ALLOCATOR_SLOP)*PAGE_SIZE_asm

no user for INIT_MAP_BEYOND_END any more.

> 
> +RESERVE_BRK(pagetables, PAGE_TABLE_SIZE * PAGE_SIZE)
> +
> /*
>  * 32-bit kernel entrypoint; only used by the boot CPU.  On entry,
>  * %esi points to the real-mode code as a 32-bit pointer.
> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
> index b344908..d633958 100644
> --- a/arch/x86/kernel/setup.c
> +++ b/arch/x86/kernel/setup.c
> @@ -112,6 +112,8 @@
> #define ARCH_SETUP
> #endif
> 
> +RESERVE_BRK(dmi_alloc, 65536);
> +
> unsigned int boot_cpu_id __read_mostly;
> 
> static __initdata unsigned long _brk_start = (unsigned long)__brk_base;
> diff --git a/arch/x86/kernel/vmlinux_32.lds.S
> b/arch/x86/kernel/vmlinux_32.lds.S
> index 1063fbe..4005279 100644
> --- a/arch/x86/kernel/vmlinux_32.lds.S
> +++ b/arch/x86/kernel/vmlinux_32.lds.S
> @@ -192,7 +192,8 @@ SECTIONS
> 
>     . = ALIGN(PAGE_SIZE);
>     __brk_base = . ;
> -    . += 1024 * 1024 ;
> +     . += 64 * 1024 ;    /* 64k slop space */
> +    *(.brk_reservation)    /* areas brk users have reserved */
>     __brk_limit = . ;
> 
>       _end = . ;
> @@ -201,6 +202,7 @@ SECTIONS
>   /* Sections to be discarded */
>   /DISCARD/ : {
>     *(.exitcall.exit)
> +    *(.discard)
>     }
> 
>   STABS_DEBUG

could add 
/*
 * Build-time check on the image size:
 */
ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
        "kernel image bigger than KERNEL_IMAGE_SIZE")


for 32bit

YH

> diff --git a/arch/x86/kernel/vmlinux_64.lds.S
> b/arch/x86/kernel/vmlinux_64.lds.S
> index b8b83e4..47deee3 100644
> --- a/arch/x86/kernel/vmlinux_64.lds.S
> +++ b/arch/x86/kernel/vmlinux_64.lds.S
> @@ -250,7 +250,8 @@ SECTIONS
> 
>      . = ALIGN(PAGE_SIZE);
>      __brk_base = . ;
> -     . += 1024 * 1024 ;
> +     . += 64 * 1024 ;    /* 64k slop space */
> +    *(.brk_reservation)    /* areas brk users have reserved */
>      __brk_limit = . ;
>   }
> 
> @@ -260,6 +261,7 @@ SECTIONS
>   /DISCARD/ : {
>     *(.exitcall.exit)
>     *(.eh_frame)
> +    *(.discard)
>     }
> 
>   STABS_DEBUG
> 


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [GIT PULL] x86: add brk allocator for very early allocations
  2009-03-13  0:44     ` Yinghai Lu
@ 2009-03-13 20:27       ` Jeremy Fitzhardinge
  2009-03-13 21:03         ` Yinghai Lu
  2009-03-13 22:45         ` H. Peter Anvin
  0 siblings, 2 replies; 12+ messages in thread
From: Jeremy Fitzhardinge @ 2009-03-13 20:27 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: H. Peter Anvin, Ingo Molnar, the arch/x86 maintainers,
	Eric W. Biederman, Linux Kernel Mailing List

Yinghai Lu wrote:
> Jeremy Fitzhardinge wrote:
>   
>> Yinghai Lu wrote:
>>     
>>> could have more explanation about the 1M size.
>>> because initial_pg_tables will sit in it. please consider to add
>>> something like
>>>
>>> in head_32.S
>>>
>>>  LOW_PAGES = (KERNEL_IMAGE_SIZE + PAGE_SIZE_asm - 1)>>PAGE_SHIFT
>>>  
>>>  #if PTRS_PER_PMD > 1
>>>  PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PMD) + PTRS_PER_PGD
>>>  #else
>>>  PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PGD)
>>>  #endif
>>>  ALLOCATOR_SLOP = 4
>>>   
>>>       
>> OK, how does this look:
>>
>> The following changes since commit
>> 21e8ba72daf5d7f0af33968f873499c85f96ccef:
>>  Jeremy Fitzhardinge (1):
>>        x86: use brk allocation for DMI
>>
>> are available in the git repository at:
>>
>>  git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen.git push/x86/brk
>>
>> Jeremy Fitzhardinge (1):
>>      x86: allow extend_brk users to reserve brk space
>>
>> Yinghai Lu (1):
>>      x86-32: compute initial mapping size more accurately
>>
>> arch/x86/include/asm/page_32_types.h |    5 +++++
>> arch/x86/include/asm/setup.h         |   30 ++++++++++++++++++++++++++++++
>> arch/x86/kernel/head_32.S            |    4 +++-
>> arch/x86/kernel/setup.c              |    2 ++
>> arch/x86/kernel/vmlinux_32.lds.S     |    4 +++-
>> arch/x86/kernel/vmlinux_64.lds.S     |    4 +++-
>> 6 files changed, 46 insertions(+), 3 deletions(-)
>>
>> git diff 21e8ba72daf5d7f0af33968f873499c85f96ccef..push/x86/brk
>> diff --git a/arch/x86/include/asm/page_32_types.h
>> b/arch/x86/include/asm/page_32_types.h
>> index f1e4a79..0f915ae 100644
>> --- a/arch/x86/include/asm/page_32_types.h
>> +++ b/arch/x86/include/asm/page_32_types.h
>> @@ -39,6 +39,11 @@
>> #define __VIRTUAL_MASK_SHIFT    32
>> #endif    /* CONFIG_X86_PAE */
>>
>> +/*
>> + * Kernel image size is limited to 512 MB (see in
>> arch/x86/kernel/head_32.S)
>> + */
>> +#define KERNEL_IMAGE_SIZE    (512 * 1024 * 1024)
>> +
>> #ifndef __ASSEMBLY__
>>
>> /*
>> diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
>> index 366d366..61b126b 100644
>> --- a/arch/x86/include/asm/setup.h
>> +++ b/arch/x86/include/asm/setup.h
>> @@ -104,6 +104,29 @@ extern struct boot_params boot_params;
>> extern unsigned long _brk_end;
>> void *extend_brk(size_t size, size_t align);
>>
>> +/*
>> + * Reserve space in the brk section.  The name must be unique within
>> + * the file, and somewhat descriptive.  The size is in bytes.  Must be
>> + * used at file scope.
>> + *
>> + * (This uses a temp function to wrap the asm so we can pass it the
>> + * size parameter; otherwise we wouldn't be able to.  We can't use a
>> + * "section" attribute on a normal variable because it always ends up
>> + * being @progbits, which ends up allocating space in the vmlinux
>> + * executable.)
>> + */
>> +#define RESERVE_BRK(name,sz)                        \
>> +    static void __section(.discard) __used            \
>> +    __brk_reservation_fn_##name##__(void) {                \
>> +        asm volatile (                        \
>> +            ".pushsection .brk_reservation,\"aw\",@nobits;" \
>> +            "__brk_reservation_" #name "__:"        \
>> +            " 1:.skip %c0;"                    \
>> +            " .size __brk_reservation_" #name "__, . - 1b;"    \
>> +            " .popsection"                    \
>> +            : : "i" (sz));                    \
>> +    }
>> +
>> #ifdef __i386__
>>
>> void __init i386_start_kernel(void);
>> @@ -115,6 +138,13 @@ void __init x86_64_start_reservations(char
>> *real_mode_data);
>>
>> #endif /* __i386__ */
>> #endif /* _SETUP */
>> +#else
>> +#define RESERVE_BRK(name,sz)                \
>> +    .pushsection .brk_reservation,"aw",@nobits;    \
>> +__brk_reservation_##name##__:                \
>> +1:    .skip sz;                    \
>> +    .size __brk_reservation_##name##__,.-1b;    \
>> +    .popsection
>> #endif /* __ASSEMBLY__ */
>> #endif  /*  __KERNEL__  */
>>
>> diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
>> index d243437..80dc05e 100644
>> --- a/arch/x86/kernel/head_32.S
>> +++ b/arch/x86/kernel/head_32.S
>> @@ -54,7 +54,7 @@
>>  *
>>  * This should be a multiple of a page.
>>  */
>> -LOW_PAGES = 1<<(32-PAGE_SHIFT_asm)
>> +LOW_PAGES = (KERNEL_IMAGE_SIZE + PAGE_SIZE_asm - 1)>>PAGE_SHIFT
>>
>> /*
>>  * To preserve the DMA pool in PAGEALLOC kernels, we'll allocate
>> @@ -75,6 +75,8 @@ ALLOCATOR_SLOP = 4
>>
>> INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE +
>> ALLOCATOR_SLOP)*PAGE_SIZE_asm
>>     
>
> no user for INIT_MAP_BEYOND_END any more.
>   

There are several remaining references:

: abulafia:pts/0; grep INIT_MAP_BEYOND_END arch/x86/kernel/head_32.S 
INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_SIZE_asm
 * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END.
	 * End condition: we must map up to and including INIT_MAP_BEYOND_END
	leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp
	 * End condition: we must map up to and including INIT_MAP_BEYOND_END
	leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp

Are you saying they're redundant and should be removed?

> could add 
> /*
>  * Build-time check on the image size:
>  */
> ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
>         "kernel image bigger than KERNEL_IMAGE_SIZE")
>
>
> for 32bit
>   

I guess we could, but it doesn't seem very urgent.

    J

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [GIT PULL] x86: add brk allocator for very early allocations
  2009-03-13 20:27       ` Jeremy Fitzhardinge
@ 2009-03-13 21:03         ` Yinghai Lu
  2009-03-13 22:45         ` H. Peter Anvin
  1 sibling, 0 replies; 12+ messages in thread
From: Yinghai Lu @ 2009-03-13 21:03 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: H. Peter Anvin, Ingo Molnar, the arch/x86 maintainers,
	Eric W. Biederman, Linux Kernel Mailing List

[-- Attachment #1: Type: text/plain, Size: 11785 bytes --]

Jeremy Fitzhardinge wrote:
> Yinghai Lu wrote:
>> Jeremy Fitzhardinge wrote:
>>  
>>> Yinghai Lu wrote:
>>>    
>>>> could have more explanation about the 1M size.
>>>> because initial_pg_tables will sit in it. please consider to add
>>>> something like
>>>>
>>>> in head_32.S
>>>>
>>>>  LOW_PAGES = (KERNEL_IMAGE_SIZE + PAGE_SIZE_asm - 1)>>PAGE_SHIFT
>>>>  
>>>>  #if PTRS_PER_PMD > 1
>>>>  PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PMD) + PTRS_PER_PGD
>>>>  #else
>>>>  PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PGD)
>>>>  #endif
>>>>  ALLOCATOR_SLOP = 4
>>>>         
>>> OK, how does this look:
>>>
>>> The following changes since commit
>>> 21e8ba72daf5d7f0af33968f873499c85f96ccef:
>>>  Jeremy Fitzhardinge (1):
>>>        x86: use brk allocation for DMI
>>>
>>> are available in the git repository at:
>>>
>>>  git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen.git
>>> push/x86/brk
>>>
>>> Jeremy Fitzhardinge (1):
>>>      x86: allow extend_brk users to reserve brk space
>>>
>>> Yinghai Lu (1):
>>>      x86-32: compute initial mapping size more accurately
>>>
>>> arch/x86/include/asm/page_32_types.h |    5 +++++
>>> arch/x86/include/asm/setup.h         |   30
>>> ++++++++++++++++++++++++++++++
>>> arch/x86/kernel/head_32.S            |    4 +++-
>>> arch/x86/kernel/setup.c              |    2 ++
>>> arch/x86/kernel/vmlinux_32.lds.S     |    4 +++-
>>> arch/x86/kernel/vmlinux_64.lds.S     |    4 +++-
>>> 6 files changed, 46 insertions(+), 3 deletions(-)
>>>
>>> git diff 21e8ba72daf5d7f0af33968f873499c85f96ccef..push/x86/brk
>>> diff --git a/arch/x86/include/asm/page_32_types.h
>>> b/arch/x86/include/asm/page_32_types.h
>>> index f1e4a79..0f915ae 100644
>>> --- a/arch/x86/include/asm/page_32_types.h
>>> +++ b/arch/x86/include/asm/page_32_types.h
>>> @@ -39,6 +39,11 @@
>>> #define __VIRTUAL_MASK_SHIFT    32
>>> #endif    /* CONFIG_X86_PAE */
>>>
>>> +/*
>>> + * Kernel image size is limited to 512 MB (see in
>>> arch/x86/kernel/head_32.S)
>>> + */
>>> +#define KERNEL_IMAGE_SIZE    (512 * 1024 * 1024)
>>> +
>>> #ifndef __ASSEMBLY__
>>>
>>> /*
>>> diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
>>> index 366d366..61b126b 100644
>>> --- a/arch/x86/include/asm/setup.h
>>> +++ b/arch/x86/include/asm/setup.h
>>> @@ -104,6 +104,29 @@ extern struct boot_params boot_params;
>>> extern unsigned long _brk_end;
>>> void *extend_brk(size_t size, size_t align);
>>>
>>> +/*
>>> + * Reserve space in the brk section.  The name must be unique within
>>> + * the file, and somewhat descriptive.  The size is in bytes.  Must be
>>> + * used at file scope.
>>> + *
>>> + * (This uses a temp function to wrap the asm so we can pass it the
>>> + * size parameter; otherwise we wouldn't be able to.  We can't use a
>>> + * "section" attribute on a normal variable because it always ends up
>>> + * being @progbits, which ends up allocating space in the vmlinux
>>> + * executable.)
>>> + */
>>> +#define RESERVE_BRK(name,sz)                        \
>>> +    static void __section(.discard) __used            \
>>> +    __brk_reservation_fn_##name##__(void) {                \
>>> +        asm volatile (                        \
>>> +            ".pushsection .brk_reservation,\"aw\",@nobits;" \
>>> +            "__brk_reservation_" #name "__:"        \
>>> +            " 1:.skip %c0;"                    \
>>> +            " .size __brk_reservation_" #name "__, . - 1b;"    \
>>> +            " .popsection"                    \
>>> +            : : "i" (sz));                    \
>>> +    }
>>> +
>>> #ifdef __i386__
>>>
>>> void __init i386_start_kernel(void);
>>> @@ -115,6 +138,13 @@ void __init x86_64_start_reservations(char
>>> *real_mode_data);
>>>
>>> #endif /* __i386__ */
>>> #endif /* _SETUP */
>>> +#else
>>> +#define RESERVE_BRK(name,sz)                \
>>> +    .pushsection .brk_reservation,"aw",@nobits;    \
>>> +__brk_reservation_##name##__:                \
>>> +1:    .skip sz;                    \
>>> +    .size __brk_reservation_##name##__,.-1b;    \
>>> +    .popsection
>>> #endif /* __ASSEMBLY__ */
>>> #endif  /*  __KERNEL__  */
>>>
>>> diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
>>> index d243437..80dc05e 100644
>>> --- a/arch/x86/kernel/head_32.S
>>> +++ b/arch/x86/kernel/head_32.S
>>> @@ -54,7 +54,7 @@
>>>  *
>>>  * This should be a multiple of a page.
>>>  */
>>> -LOW_PAGES = 1<<(32-PAGE_SHIFT_asm)
>>> +LOW_PAGES = (KERNEL_IMAGE_SIZE + PAGE_SIZE_asm - 1)>>PAGE_SHIFT
>>>
>>> /*
>>>  * To preserve the DMA pool in PAGEALLOC kernels, we'll allocate
>>> @@ -75,6 +75,8 @@ ALLOCATOR_SLOP = 4
>>>
>>> INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE +
>>> ALLOCATOR_SLOP)*PAGE_SIZE_asm
>>>     
>>
>> no user for INIT_MAP_BEYOND_END any more.
>>   
> 
> There are several remaining references:
> 
> : abulafia:pts/0; grep INIT_MAP_BEYOND_END arch/x86/kernel/head_32.S
> INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE +
> ALLOCATOR_SLOP)*PAGE_SIZE_asm
> * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END.
>      * End condition: we must map up to and including INIT_MAP_BEYOND_END
>     leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp
>      * End condition: we must map up to and including INIT_MAP_BEYOND_END
>     leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp
> 
> Are you saying they're redundant and should be removed?

please check attached ...

Impact: cleanup

Don't use ram after _end blindly for pagetables. aka init pages is before _end
put those pg table into .bss

v2: keep initial page table up to 512M only.
v4: put initial page tables just before _end

Signed-off-by: Yinghai Lu <yinghai@kernel.org>

---
 arch/x86/include/asm/page_32_types.h |    5 +++
 arch/x86/kernel/head32.c             |    3 +
 arch/x86/kernel/head_32.S            |   55 ++++++++++++++---------------------
 arch/x86/kernel/vmlinux_32.lds.S     |   11 ++++++-
 4 files changed, 40 insertions(+), 34 deletions(-)

Index: linux-2.6/arch/x86/kernel/head32.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/head32.c
+++ linux-2.6/arch/x86/kernel/head32.c
@@ -18,7 +18,8 @@ void __init i386_start_kernel(void)
 {
 	reserve_trampoline_memory();
 
-	reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS");
+	reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop),
+			 "TEXT DATA BSS");
 
 #ifdef CONFIG_BLK_DEV_INITRD
 	/* Reserve INITRD */
Index: linux-2.6/arch/x86/kernel/head_32.S
===================================================================
--- linux-2.6.orig/arch/x86/kernel/head_32.S
+++ linux-2.6/arch/x86/kernel/head_32.S
@@ -38,42 +38,30 @@
 #define X86_VENDOR_ID	new_cpu_data+CPUINFO_x86_vendor_id
 
 /*
- * This is how much memory *in addition to the memory covered up to
- * and including _end* we need mapped initially.
- * We need:
- *  - one bit for each possible page, but only in low memory, which means
- *     232/4096/8 = 128K worst case (4G/4G split.)
+ * This is how much memory for page table to and including _end
+ * we need mapped initially.
  *  - enough space to map all low memory, which means
- *     (232/4096) / 1024 pages (worst case, non PAE)
- *     (232/4096) / 512 + 4 pages (worst case for PAE)
- *  - a few pages for allocator use before the kernel pagetable has
- *     been set up
+ *     (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE)
+ *     (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE)
  *
  * Modulo rounding, each megabyte assigned here requires a kilobyte of
  * memory, which is currently unreclaimed.
  *
  * This should be a multiple of a page.
+ *
+ * KERNEL_IMAGE_SIZE should be greater than pa(_end)
+ * and small than max_low_pfn, otherwise will waste some page table entries
  */
-LOW_PAGES = 1<<(32-PAGE_SHIFT_asm)
-
-/*
- * To preserve the DMA pool in PAGEALLOC kernels, we'll allocate
- * pagetables from above the 16MB DMA limit, so we'll have to set
- * up pagetables 16MB more (worst-case):
- */
-#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK)
-LOW_PAGES = LOW_PAGES + 0x1000000
-#endif
+LOW_PAGES = (KERNEL_IMAGE_SIZE + PAGE_SIZE_asm - 1)>>PAGE_SHIFT
 
 #if PTRS_PER_PMD > 1
 PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PMD) + PTRS_PER_PGD
 #else
 PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PGD)
 #endif
-BOOTBITMAP_SIZE = LOW_PAGES / 8
 ALLOCATOR_SLOP = 4
 
-INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_SIZE_asm
+INIT_MAP_SIZE = (PAGE_TABLE_SIZE + ALLOCATOR_SLOP) * PAGE_SIZE_asm
 
 /*
  * 32-bit kernel entrypoint; only used by the boot CPU.  On entry,
@@ -166,10 +154,9 @@ num_subarch_entries = (. - subarch_entri
 
 /*
  * Initialize page tables.  This creates a PDE and a set of page
- * tables, which are located immediately beyond _end.  The variable
- * init_pg_tables_end is set up to point to the first "safe" location.
+ * tables, which are located immediately beyond _end.
  * Mappings are created both at virtual address 0 (identity mapping)
- * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END.
+ * and PAGE_OFFSET for up to _end
  *
  * Note that the stack is not yet set up!
  */
@@ -209,14 +196,14 @@ default_entry:
 	loop 11b
 
 	/*
-	 * End condition: we must map up to and including INIT_MAP_BEYOND_END
-	 * bytes beyond the end of our own page tables.
+	 * End condition: we must map up to the end.
 	 */
-	leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp
+	movl $pa(_end), %ebp
+	addl PTE_IDENT_ATTR, %ebp
 	cmpl %ebp,%eax
 	jb 10b
 1:
-	movl %edi,pa(init_pg_tables_end)
+	movl %edi, pa(init_pg_tables_end)
 	shrl $12, %eax
 	movl %eax, pa(max_pfn_mapped)
 
@@ -242,14 +229,14 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
 	addl $0x1000,%eax
 	loop 11b
 	/*
-	 * End condition: we must map up to and including INIT_MAP_BEYOND_END
-	 * bytes beyond the end of our own page tables; the +0x007 is
+	 * End condition: we must map up to end, the +0x007 is
 	 * the attribute bits
 	 */
-	leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp
+	movl $pa(_end), %ebp
+	addl PTE_IDENT_ATTR, %ebp
 	cmpl %ebp,%eax
 	jb 10b
-	movl %edi,pa(init_pg_tables_end)
+	movl %edi, pa(init_pg_tables_end)
 	shrl $12, %eax
 	movl %eax, pa(max_pfn_mapped)
 
@@ -636,6 +623,10 @@ swapper_pg_fixmap:
 	.fill 1024,4,0
 ENTRY(empty_zero_page)
 	.fill 4096,1,0
+
+.section ".bss.extra_page_aligned","wa"
+	.align PAGE_SIZE_asm
+	.fill INIT_MAP_SIZE,1,0
 /*
  * This starts the data section.
  */
Index: linux-2.6/arch/x86/kernel/vmlinux_32.lds.S
===================================================================
--- linux-2.6.orig/arch/x86/kernel/vmlinux_32.lds.S
+++ linux-2.6/arch/x86/kernel/vmlinux_32.lds.S
@@ -189,10 +189,13 @@ SECTIONS
 	*(.bss)
 	. = ALIGN(4);
 	__bss_stop = .;
-  	_end = . ;
+	/* extra_page_aligned must be last one before end*/
 	/* This is where the kernel creates the early boot page tables */
 	. = ALIGN(PAGE_SIZE);
 	pg0 = . ;
+	*(.bss.extra_page_aligned)
+	. = ALIGN(8);
+	_end = . ;
   }
 
   /* Sections to be discarded */
@@ -205,6 +208,12 @@ SECTIONS
   DWARF_DEBUG
 }
 
+/*
+ * Build-time check on the image size:
+ */
+ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
+	"kernel image bigger than KERNEL_IMAGE_SIZE")
+
 #ifdef CONFIG_KEXEC
 /* Link time checks */
 #include <asm/kexec.h>
Index: linux-2.6/arch/x86/include/asm/page_32_types.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/page_32_types.h
+++ linux-2.6/arch/x86/include/asm/page_32_types.h
@@ -39,6 +39,11 @@
 #define __VIRTUAL_MASK_SHIFT	32
 #endif	/* CONFIG_X86_PAE */
 
+/*
+ * Kernel image size is limited to 512 MB (see in arch/x86/kernel/head_32.S)
+ */
+#define KERNEL_IMAGE_SIZE	(512 * 1024 * 1024)
+
 #ifndef __ASSEMBLY__
 
 /*


[-- Attachment #2: Attached Message --]
[-- Type: message/rfc822, Size: 7517 bytes --]

From: Yinghai Lu <yinghai@kernel.org>
To: "H. Peter Anvin" <hpa@zytor.com>, Ingo Molnar <mingo@elte.hu>,  Andrew Morton <akpm@linux-foundation.org>, Thomas Gleixner <tglx@linutronix.de>,  Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Linux Kernel Mailing List <linux-kernel@vger.kernel.org>
Subject: [PATCH] x86: put initial_pg_tables into .bss -v4
Date: Mon, 09 Mar 2009 01:15:57 -0700
Message-ID: <49B4D03D.7030205@kernel.org>


Impact: cleanup

Don't use ram after _end blindly for pagetables. aka init pages is before _end
put those pg table into .bss

v2: keep initial page table up to 512M only.
v4: put initial page tables just before _end

Signed-off-by: Yinghai Lu <yinghai@kernel.org>

---
 arch/x86/include/asm/page_32_types.h |    5 +++
 arch/x86/kernel/head32.c             |    3 +
 arch/x86/kernel/head_32.S            |   55 ++++++++++++++---------------------
 arch/x86/kernel/vmlinux_32.lds.S     |   11 ++++++-
 4 files changed, 40 insertions(+), 34 deletions(-)

Index: linux-2.6/arch/x86/kernel/head32.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/head32.c
+++ linux-2.6/arch/x86/kernel/head32.c
@@ -18,7 +18,8 @@ void __init i386_start_kernel(void)
 {
 	reserve_trampoline_memory();
 
-	reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS");
+	reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop),
+			 "TEXT DATA BSS");
 
 #ifdef CONFIG_BLK_DEV_INITRD
 	/* Reserve INITRD */
Index: linux-2.6/arch/x86/kernel/head_32.S
===================================================================
--- linux-2.6.orig/arch/x86/kernel/head_32.S
+++ linux-2.6/arch/x86/kernel/head_32.S
@@ -38,42 +38,30 @@
 #define X86_VENDOR_ID	new_cpu_data+CPUINFO_x86_vendor_id
 
 /*
- * This is how much memory *in addition to the memory covered up to
- * and including _end* we need mapped initially.
- * We need:
- *  - one bit for each possible page, but only in low memory, which means
- *     2^32/4096/8 = 128K worst case (4G/4G split.)
+ * This is how much memory for page table to and including _end
+ * we need mapped initially.
  *  - enough space to map all low memory, which means
- *     (2^32/4096) / 1024 pages (worst case, non PAE)
- *     (2^32/4096) / 512 + 4 pages (worst case for PAE)
- *  - a few pages for allocator use before the kernel pagetable has
- *     been set up
+ *     (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE)
+ *     (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE)
  *
  * Modulo rounding, each megabyte assigned here requires a kilobyte of
  * memory, which is currently unreclaimed.
  *
  * This should be a multiple of a page.
+ *
+ * KERNEL_IMAGE_SIZE should be greater than pa(_end)
+ * and small than max_low_pfn, otherwise will waste some page table entries
  */
-LOW_PAGES = 1<<(32-PAGE_SHIFT_asm)
-
-/*
- * To preserve the DMA pool in PAGEALLOC kernels, we'll allocate
- * pagetables from above the 16MB DMA limit, so we'll have to set
- * up pagetables 16MB more (worst-case):
- */
-#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK)
-LOW_PAGES = LOW_PAGES + 0x1000000
-#endif
+LOW_PAGES = (KERNEL_IMAGE_SIZE + PAGE_SIZE_asm - 1)>>PAGE_SHIFT
 
 #if PTRS_PER_PMD > 1
 PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PMD) + PTRS_PER_PGD
 #else
 PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PGD)
 #endif
-BOOTBITMAP_SIZE = LOW_PAGES / 8
 ALLOCATOR_SLOP = 4
 
-INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_SIZE_asm
+INIT_MAP_SIZE = (PAGE_TABLE_SIZE + ALLOCATOR_SLOP) * PAGE_SIZE_asm
 
 /*
  * 32-bit kernel entrypoint; only used by the boot CPU.  On entry,
@@ -166,10 +154,9 @@ num_subarch_entries = (. - subarch_entri
 
 /*
  * Initialize page tables.  This creates a PDE and a set of page
- * tables, which are located immediately beyond _end.  The variable
- * init_pg_tables_end is set up to point to the first "safe" location.
+ * tables, which are located immediately beyond _end.
  * Mappings are created both at virtual address 0 (identity mapping)
- * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END.
+ * and PAGE_OFFSET for up to _end
  *
  * Note that the stack is not yet set up!
  */
@@ -209,14 +196,14 @@ default_entry:
 	loop 11b
 
 	/*
-	 * End condition: we must map up to and including INIT_MAP_BEYOND_END
-	 * bytes beyond the end of our own page tables.
+	 * End condition: we must map up to the end.
 	 */
-	leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp
+	movl $pa(_end), %ebp
+	addl PTE_IDENT_ATTR, %ebp
 	cmpl %ebp,%eax
 	jb 10b
 1:
-	movl %edi,pa(init_pg_tables_end)
+	movl %edi, pa(init_pg_tables_end)
 	shrl $12, %eax
 	movl %eax, pa(max_pfn_mapped)
 
@@ -242,14 +229,14 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
 	addl $0x1000,%eax
 	loop 11b
 	/*
-	 * End condition: we must map up to and including INIT_MAP_BEYOND_END
-	 * bytes beyond the end of our own page tables; the +0x007 is
+	 * End condition: we must map up to end, the +0x007 is
 	 * the attribute bits
 	 */
-	leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp
+	movl $pa(_end), %ebp
+	addl PTE_IDENT_ATTR, %ebp
 	cmpl %ebp,%eax
 	jb 10b
-	movl %edi,pa(init_pg_tables_end)
+	movl %edi, pa(init_pg_tables_end)
 	shrl $12, %eax
 	movl %eax, pa(max_pfn_mapped)
 
@@ -636,6 +623,10 @@ swapper_pg_fixmap:
 	.fill 1024,4,0
 ENTRY(empty_zero_page)
 	.fill 4096,1,0
+
+.section ".bss.extra_page_aligned","wa"
+	.align PAGE_SIZE_asm
+	.fill INIT_MAP_SIZE,1,0
 /*
  * This starts the data section.
  */
Index: linux-2.6/arch/x86/kernel/vmlinux_32.lds.S
===================================================================
--- linux-2.6.orig/arch/x86/kernel/vmlinux_32.lds.S
+++ linux-2.6/arch/x86/kernel/vmlinux_32.lds.S
@@ -189,10 +189,13 @@ SECTIONS
 	*(.bss)
 	. = ALIGN(4);
 	__bss_stop = .;
-  	_end = . ;
+	/* extra_page_aligned must be last one before end*/
 	/* This is where the kernel creates the early boot page tables */
 	. = ALIGN(PAGE_SIZE);
 	pg0 = . ;
+	*(.bss.extra_page_aligned)
+	. = ALIGN(8);
+	_end = . ;
   }
 
   /* Sections to be discarded */
@@ -205,6 +208,12 @@ SECTIONS
   DWARF_DEBUG
 }
 
+/*
+ * Build-time check on the image size:
+ */
+ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
+	"kernel image bigger than KERNEL_IMAGE_SIZE")
+
 #ifdef CONFIG_KEXEC
 /* Link time checks */
 #include <asm/kexec.h>
Index: linux-2.6/arch/x86/include/asm/page_32_types.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/page_32_types.h
+++ linux-2.6/arch/x86/include/asm/page_32_types.h
@@ -39,6 +39,11 @@
 #define __VIRTUAL_MASK_SHIFT	32
 #endif	/* CONFIG_X86_PAE */
 
+/*
+ * Kernel image size is limited to 512 MB (see in arch/x86/kernel/head_32.S)
+ */
+#define KERNEL_IMAGE_SIZE	(512 * 1024 * 1024)
+
 #ifndef __ASSEMBLY__
 
 /*


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [GIT PULL] x86: add brk allocator for very early allocations
  2009-03-13 20:27       ` Jeremy Fitzhardinge
  2009-03-13 21:03         ` Yinghai Lu
@ 2009-03-13 22:45         ` H. Peter Anvin
  2009-03-13 22:59           ` Jeremy Fitzhardinge
  1 sibling, 1 reply; 12+ messages in thread
From: H. Peter Anvin @ 2009-03-13 22:45 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: Yinghai Lu, Ingo Molnar, the arch/x86 maintainers,
	Eric W. Biederman, Linux Kernel Mailing List

Jeremy Fitzhardinge wrote:
>>>
>>> INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE +
>>> ALLOCATOR_SLOP)*PAGE_SIZE_asm
>>>     
>>
>> no user for INIT_MAP_BEYOND_END any more.
>>   
> 
> There are several remaining references:
> 
> : abulafia:pts/0; grep INIT_MAP_BEYOND_END arch/x86/kernel/head_32.S 
> INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + 
> ALLOCATOR_SLOP)*PAGE_SIZE_asm
> * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END.
>      * End condition: we must map up to and including INIT_MAP_BEYOND_END
>     leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp
>      * End condition: we must map up to and including INIT_MAP_BEYOND_END
>     leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp
> 
> Are you saying they're redundant and should be removed?
> 

Those references are just the actual reservation of memory.  If all the 
users of that memory are converted to either brk or bss, 
INIT_MAP_BEYOND_END should be removed.  If all the users of that memory 
aren't converted to brk to bss, we should do so.

	-hpa

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [GIT PULL] x86: add brk allocator for very early allocations
  2009-03-13 22:45         ` H. Peter Anvin
@ 2009-03-13 22:59           ` Jeremy Fitzhardinge
  2009-03-13 23:20             ` Yinghai Lu
  0 siblings, 1 reply; 12+ messages in thread
From: Jeremy Fitzhardinge @ 2009-03-13 22:59 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Yinghai Lu, Ingo Molnar, the arch/x86 maintainers,
	Eric W. Biederman, Linux Kernel Mailing List

H. Peter Anvin wrote:
> Those references are just the actual reservation of memory.  If all 
> the users of that memory are converted to either brk or bss, 
> INIT_MAP_BEYOND_END should be removed.  If all the users of that 
> memory aren't converted to brk to bss, we should do so.

I just added an adapted Yinghai's patch and added it to push/x86/brk.

    J


The following changes since commit 8131667360004a0b74e4dcadfee8a18d4e2b074f:
  Jeremy Fitzhardinge (1):
        x86: allow extend_brk users to reserve brk space

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen.git push/x86/brk

Yinghai Lu (1):
      x86: put initial_pg_tables into .bss -v4

 arch/x86/kernel/head_32.S        |   47 ++++++++++++++-----------------------
 arch/x86/kernel/vmlinux_32.lds.S |    6 +++++
 2 files changed, 24 insertions(+), 29 deletions(-)

diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 80dc05e..ad7dbbb 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -38,42 +38,30 @@
 #define X86_VENDOR_ID	new_cpu_data+CPUINFO_x86_vendor_id
 
 /*
- * This is how much memory *in addition to the memory covered up to
- * and including _end* we need mapped initially.
- * We need:
- *  - one bit for each possible page, but only in low memory, which means
- *     2^32/4096/8 = 128K worst case (4G/4G split.)
+ * This is how much memory for page table to and including _end
+ * we need mapped initially.
  *  - enough space to map all low memory, which means
- *     (2^32/4096) / 1024 pages (worst case, non PAE)
- *     (2^32/4096) / 512 + 4 pages (worst case for PAE)
- *  - a few pages for allocator use before the kernel pagetable has
- *     been set up
+ *     (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE)
+ *     (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE)
  *
  * Modulo rounding, each megabyte assigned here requires a kilobyte of
  * memory, which is currently unreclaimed.
  *
  * This should be a multiple of a page.
+ *
+ * KERNEL_IMAGE_SIZE should be greater than pa(_end)
+ * and small than max_low_pfn, otherwise will waste some page table entries
  */
 LOW_PAGES = (KERNEL_IMAGE_SIZE + PAGE_SIZE_asm - 1)>>PAGE_SHIFT
 
-/*
- * To preserve the DMA pool in PAGEALLOC kernels, we'll allocate
- * pagetables from above the 16MB DMA limit, so we'll have to set
- * up pagetables 16MB more (worst-case):
- */
-#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK)
-LOW_PAGES = LOW_PAGES + 0x1000000
-#endif
-
 #if PTRS_PER_PMD > 1
 PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PMD) + PTRS_PER_PGD
 #else
 PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PGD)
 #endif
-BOOTBITMAP_SIZE = LOW_PAGES / 8
 ALLOCATOR_SLOP = 4
 
-INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_SIZE_asm
+INIT_MAP_SIZE = (PAGE_TABLE_SIZE + ALLOCATOR_SLOP) * PAGE_SIZE_asm
 
 RESERVE_BRK(pagetables, PAGE_TABLE_SIZE * PAGE_SIZE)
 
@@ -168,10 +156,10 @@ num_subarch_entries = (. - subarch_entries) / 4
 
 /*
  * Initialize page tables.  This creates a PDE and a set of page
- * tables, which are located immediately beyond _end.  The variable
+ * tables, which are located immediately beyond __brk_base.  The variable
  * _brk_end is set up to point to the first "safe" location.
  * Mappings are created both at virtual address 0 (identity mapping)
- * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END.
+ * and PAGE_OFFSET for up to _end.
  *
  * Note that the stack is not yet set up!
  */
@@ -210,10 +198,9 @@ default_entry:
 	loop 11b
 
 	/*
-	 * End condition: we must map up to and including INIT_MAP_BEYOND_END
-	 * bytes beyond the end of our own page tables.
+	 * End condition: we must map up to the end.
 	 */
-	leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp
+	movl $pa(_end) + PTE_IDENT_ATTR, %ebp
 	cmpl %ebp,%eax
 	jb 10b
 1:
@@ -243,11 +230,9 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
 	addl $0x1000,%eax
 	loop 11b
 	/*
-	 * End condition: we must map up to and including INIT_MAP_BEYOND_END
-	 * bytes beyond the end of our own page tables; the +0x007 is
-	 * the attribute bits
+	 * End condition: we must map up to end
 	 */
-	leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp
+	movl $pa(_end) + PTE_IDENT_ATTR, %ebp
 	cmpl %ebp,%eax
 	jb 10b
 	addl $__PAGE_OFFSET, %edi
@@ -638,6 +623,10 @@ swapper_pg_fixmap:
 	.fill 1024,4,0
 ENTRY(empty_zero_page)
 	.fill 4096,1,0
+
+.section ".bss.extra_page_aligned","wa"
+	.align PAGE_SIZE_asm
+	.fill INIT_MAP_SIZE,1,0
 /*
  * This starts the data section.
  */
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index 4005279..c318dee 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -210,6 +210,12 @@ SECTIONS
   DWARF_DEBUG
 }
 
+/*
+ * Build-time check on the image size:
+ */
+ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
+	"kernel image bigger than KERNEL_IMAGE_SIZE")
+
 #ifdef CONFIG_KEXEC
 /* Link time checks */
 #include <asm/kexec.h>


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [GIT PULL] x86: add brk allocator for very early allocations
  2009-03-13 22:59           ` Jeremy Fitzhardinge
@ 2009-03-13 23:20             ` Yinghai Lu
  2009-03-14  0:23               ` Jeremy Fitzhardinge
  0 siblings, 1 reply; 12+ messages in thread
From: Yinghai Lu @ 2009-03-13 23:20 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: H. Peter Anvin, Ingo Molnar, the arch/x86 maintainers,
	Eric W. Biederman, Linux Kernel Mailing List

Jeremy Fitzhardinge wrote:
> H. Peter Anvin wrote:
>> Those references are just the actual reservation of memory.  If all
>> the users of that memory are converted to either brk or bss,
>> INIT_MAP_BEYOND_END should be removed.  If all the users of that
>> memory aren't converted to brk to bss, we should do so.
> 
> I just added an adapted Yinghai's patch and added it to push/x86/brk.
> 
>    J
> 
> 
> The following changes since commit
> 8131667360004a0b74e4dcadfee8a18d4e2b074f:
>  Jeremy Fitzhardinge (1):
>        x86: allow extend_brk users to reserve brk space
> 
> are available in the git repository at:
> 
>  git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen.git push/x86/brk
> 
> Yinghai Lu (1):
>      x86: put initial_pg_tables into .bss -v4
> 
> arch/x86/kernel/head_32.S        |   47
> ++++++++++++++-----------------------
> arch/x86/kernel/vmlinux_32.lds.S |    6 +++++
> 2 files changed, 24 insertions(+), 29 deletions(-)
> 
> diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
> index 80dc05e..ad7dbbb 100644
> --- a/arch/x86/kernel/head_32.S
> +++ b/arch/x86/kernel/head_32.S
> @@ -38,42 +38,30 @@
> #define X86_VENDOR_ID    new_cpu_data+CPUINFO_x86_vendor_id
> 
> /*
> - * This is how much memory *in addition to the memory covered up to
> - * and including _end* we need mapped initially.
> - * We need:
> - *  - one bit for each possible page, but only in low memory, which means
> - *     2^32/4096/8 = 128K worst case (4G/4G split.)
> + * This is how much memory for page table to and including _end
> + * we need mapped initially.
>  *  - enough space to map all low memory, which means
> - *     (2^32/4096) / 1024 pages (worst case, non PAE)
> - *     (2^32/4096) / 512 + 4 pages (worst case for PAE)
> - *  - a few pages for allocator use before the kernel pagetable has
> - *     been set up
> + *     (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE)
> + *     (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE)
>  *
>  * Modulo rounding, each megabyte assigned here requires a kilobyte of
>  * memory, which is currently unreclaimed.
>  *
>  * This should be a multiple of a page.
> + *
> + * KERNEL_IMAGE_SIZE should be greater than pa(_end)
> + * and small than max_low_pfn, otherwise will waste some page table
> entries
>  */
> LOW_PAGES = (KERNEL_IMAGE_SIZE + PAGE_SIZE_asm - 1)>>PAGE_SHIFT
> 
> -/*
> - * To preserve the DMA pool in PAGEALLOC kernels, we'll allocate
> - * pagetables from above the 16MB DMA limit, so we'll have to set
> - * up pagetables 16MB more (worst-case):
> - */
> -#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK)
> -LOW_PAGES = LOW_PAGES + 0x1000000
> -#endif
> -
> #if PTRS_PER_PMD > 1
> PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PMD) + PTRS_PER_PGD
> #else
> PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PGD)
> #endif
> -BOOTBITMAP_SIZE = LOW_PAGES / 8
> ALLOCATOR_SLOP = 4
> 
> -INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE +
> ALLOCATOR_SLOP)*PAGE_SIZE_asm
> +INIT_MAP_SIZE = (PAGE_TABLE_SIZE + ALLOCATOR_SLOP) * PAGE_SIZE_asm
> 
> RESERVE_BRK(pagetables, PAGE_TABLE_SIZE * PAGE_SIZE)

use INIT_MAP_SIZE in RESERVE_BRK directly?

> 
> @@ -168,10 +156,10 @@ num_subarch_entries = (. - subarch_entries) / 4
> 
> /*
>  * Initialize page tables.  This creates a PDE and a set of page
> - * tables, which are located immediately beyond _end.  The variable
> + * tables, which are located immediately beyond __brk_base.  The variable
>  * _brk_end is set up to point to the first "safe" location.
>  * Mappings are created both at virtual address 0 (identity mapping)
> - * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END.
> + * and PAGE_OFFSET for up to _end.
>  *
>  * Note that the stack is not yet set up!
>  */
> @@ -210,10 +198,9 @@ default_entry:
>     loop 11b
> 
>     /*
> -     * End condition: we must map up to and including INIT_MAP_BEYOND_END
> -     * bytes beyond the end of our own page tables.
> +     * End condition: we must map up to the end.
>      */
> -    leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp
> +    movl $pa(_end) + PTE_IDENT_ATTR, %ebp
>     cmpl %ebp,%eax
>     jb 10b
> 1:
> @@ -243,11 +230,9 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
>     addl $0x1000,%eax
>     loop 11b
>     /*
> -     * End condition: we must map up to and including INIT_MAP_BEYOND_END
> -     * bytes beyond the end of our own page tables; the +0x007 is
> -     * the attribute bits
> +     * End condition: we must map up to end
>      */
> -    leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp
> +    movl $pa(_end) + PTE_IDENT_ATTR, %ebp
>     cmpl %ebp,%eax
>     jb 10b
>     addl $__PAGE_OFFSET, %edi
> @@ -638,6 +623,10 @@ swapper_pg_fixmap:
>     .fill 1024,4,0
> ENTRY(empty_zero_page)
>     .fill 4096,1,0
> +
> +.section ".bss.extra_page_aligned","wa"
> +    .align PAGE_SIZE_asm
> +    .fill INIT_MAP_SIZE,1,0

this 4 lines are not needed. you already had RESERVE_BRK for it.

YH

> /*
>  * This starts the data section.
>  */
> diff --git a/arch/x86/kernel/vmlinux_32.lds.S
> b/arch/x86/kernel/vmlinux_32.lds.S
> index 4005279..c318dee 100644
> --- a/arch/x86/kernel/vmlinux_32.lds.S
> +++ b/arch/x86/kernel/vmlinux_32.lds.S
> @@ -210,6 +210,12 @@ SECTIONS
>   DWARF_DEBUG
> }
> 
> +/*
> + * Build-time check on the image size:
> + */
> +ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
> +    "kernel image bigger than KERNEL_IMAGE_SIZE")
> +
> #ifdef CONFIG_KEXEC
> /* Link time checks */
> #include <asm/kexec.h>


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [GIT PULL] x86: add brk allocator for very early allocations
  2009-03-13 23:20             ` Yinghai Lu
@ 2009-03-14  0:23               ` Jeremy Fitzhardinge
  0 siblings, 0 replies; 12+ messages in thread
From: Jeremy Fitzhardinge @ 2009-03-14  0:23 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: H. Peter Anvin, Ingo Molnar, the arch/x86 maintainers,
	Eric W. Biederman, Linux Kernel Mailing List

Yinghai Lu wrote:
> Jeremy Fitzhardinge wrote:
>   
>> H. Peter Anvin wrote:
>>     
>>> Those references are just the actual reservation of memory.  If all
>>> the users of that memory are converted to either brk or bss,
>>> INIT_MAP_BEYOND_END should be removed.  If all the users of that
>>> memory aren't converted to brk to bss, we should do so.
>>>       
>> I just added an adapted Yinghai's patch and added it to push/x86/brk.
>>
>>    J
>>
>>
>> The following changes since commit
>> 8131667360004a0b74e4dcadfee8a18d4e2b074f:
>>  Jeremy Fitzhardinge (1):
>>        x86: allow extend_brk users to reserve brk space
>>
>> are available in the git repository at:
>>
>>  git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen.git push/x86/brk
>>
>> Yinghai Lu (1):
>>      x86: put initial_pg_tables into .bss -v4
>>
>> arch/x86/kernel/head_32.S        |   47
>> ++++++++++++++-----------------------
>> arch/x86/kernel/vmlinux_32.lds.S |    6 +++++
>> 2 files changed, 24 insertions(+), 29 deletions(-)
>>
>> diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
>> index 80dc05e..ad7dbbb 100644
>> --- a/arch/x86/kernel/head_32.S
>> +++ b/arch/x86/kernel/head_32.S
>> @@ -38,42 +38,30 @@
>> #define X86_VENDOR_ID    new_cpu_data+CPUINFO_x86_vendor_id
>>
>> /*
>> - * This is how much memory *in addition to the memory covered up to
>> - * and including _end* we need mapped initially.
>> - * We need:
>> - *  - one bit for each possible page, but only in low memory, which means
>> - *     2^32/4096/8 = 128K worst case (4G/4G split.)
>> + * This is how much memory for page table to and including _end
>> + * we need mapped initially.
>>  *  - enough space to map all low memory, which means
>> - *     (2^32/4096) / 1024 pages (worst case, non PAE)
>> - *     (2^32/4096) / 512 + 4 pages (worst case for PAE)
>> - *  - a few pages for allocator use before the kernel pagetable has
>> - *     been set up
>> + *     (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE)
>> + *     (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE)
>>  *
>>  * Modulo rounding, each megabyte assigned here requires a kilobyte of
>>  * memory, which is currently unreclaimed.
>>  *
>>  * This should be a multiple of a page.
>> + *
>> + * KERNEL_IMAGE_SIZE should be greater than pa(_end)
>> + * and small than max_low_pfn, otherwise will waste some page table
>> entries
>>  */
>> LOW_PAGES = (KERNEL_IMAGE_SIZE + PAGE_SIZE_asm - 1)>>PAGE_SHIFT
>>
>> -/*
>> - * To preserve the DMA pool in PAGEALLOC kernels, we'll allocate
>> - * pagetables from above the 16MB DMA limit, so we'll have to set
>> - * up pagetables 16MB more (worst-case):
>> - */
>> -#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK)
>> -LOW_PAGES = LOW_PAGES + 0x1000000
>> -#endif
>> -
>> #if PTRS_PER_PMD > 1
>> PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PMD) + PTRS_PER_PGD
>> #else
>> PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PGD)
>> #endif
>> -BOOTBITMAP_SIZE = LOW_PAGES / 8
>> ALLOCATOR_SLOP = 4
>>
>> -INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE +
>> ALLOCATOR_SLOP)*PAGE_SIZE_asm
>> +INIT_MAP_SIZE = (PAGE_TABLE_SIZE + ALLOCATOR_SLOP) * PAGE_SIZE_asm
>>
>> RESERVE_BRK(pagetables, PAGE_TABLE_SIZE * PAGE_SIZE)
>>     
>
> use INIT_MAP_SIZE in RESERVE_BRK directly?
>   
OK.

>   
>> @@ -168,10 +156,10 @@ num_subarch_entries = (. - subarch_entries) / 4
>>
>> /*
>>  * Initialize page tables.  This creates a PDE and a set of page
>> - * tables, which are located immediately beyond _end.  The variable
>> + * tables, which are located immediately beyond __brk_base.  The variable
>>  * _brk_end is set up to point to the first "safe" location.
>>  * Mappings are created both at virtual address 0 (identity mapping)
>> - * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END.
>> + * and PAGE_OFFSET for up to _end.
>>  *
>>  * Note that the stack is not yet set up!
>>  */
>> @@ -210,10 +198,9 @@ default_entry:
>>     loop 11b
>>
>>     /*
>> -     * End condition: we must map up to and including INIT_MAP_BEYOND_END
>> -     * bytes beyond the end of our own page tables.
>> +     * End condition: we must map up to the end.
>>      */
>> -    leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp
>> +    movl $pa(_end) + PTE_IDENT_ATTR, %ebp
>>     cmpl %ebp,%eax
>>     jb 10b
>> 1:
>> @@ -243,11 +230,9 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
>>     addl $0x1000,%eax
>>     loop 11b
>>     /*
>> -     * End condition: we must map up to and including INIT_MAP_BEYOND_END
>> -     * bytes beyond the end of our own page tables; the +0x007 is
>> -     * the attribute bits
>> +     * End condition: we must map up to end
>>      */
>> -    leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp
>> +    movl $pa(_end) + PTE_IDENT_ATTR, %ebp
>>     cmpl %ebp,%eax
>>     jb 10b
>>     addl $__PAGE_OFFSET, %edi
>> @@ -638,6 +623,10 @@ swapper_pg_fixmap:
>>     .fill 1024,4,0
>> ENTRY(empty_zero_page)
>>     .fill 4096,1,0
>> +
>> +.section ".bss.extra_page_aligned","wa"
>> +    .align PAGE_SIZE_asm
>> +    .fill INIT_MAP_SIZE,1,0
>>     
>
> this 4 lines are not needed. you already had RESERVE_BRK for it.
>
>   

OK.

I've updated the changeset and repushed.

    J

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2009-03-14  0:53 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-03-11 16:59 [GIT PULL] x86: add brk allocator for very early allocations Jeremy Fitzhardinge
2009-03-11 18:19 ` Yinghai Lu
2009-03-12 23:59   ` Jeremy Fitzhardinge
2009-03-13  0:44     ` Yinghai Lu
2009-03-13 20:27       ` Jeremy Fitzhardinge
2009-03-13 21:03         ` Yinghai Lu
2009-03-13 22:45         ` H. Peter Anvin
2009-03-13 22:59           ` Jeremy Fitzhardinge
2009-03-13 23:20             ` Yinghai Lu
2009-03-14  0:23               ` Jeremy Fitzhardinge
2009-03-11 19:20 ` Eric W. Biederman
2009-03-11 23:53   ` Jeremy Fitzhardinge

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.