* [GIT PULL] x86: add brk allocator for very early allocations
@ 2009-03-11 16:59 Jeremy Fitzhardinge
2009-03-11 18:19 ` Yinghai Lu
2009-03-11 19:20 ` Eric W. Biederman
0 siblings, 2 replies; 12+ messages in thread
From: Jeremy Fitzhardinge @ 2009-03-11 16:59 UTC (permalink / raw)
To: H. Peter Anvin
Cc: Ingo Molnar, the arch/x86 maintainers, Eric W. Biederman,
Yinghai Lu, Linux Kernel Mailing List
Aggregate patch below.
The following changes since commit 11f5585820ae805c48f41c09bc260d0e51744792:
Ingo Molnar (1):
Merge branch 'tracing/ftrace'
are available in the git repository at:
git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen.git push/x86/brk
Jeremy Fitzhardinge (4):
x86: make section delimiter symbols part of their section
x86: add brk allocation for very, very early allocations
x86-32: use brk segment for allocating initial kernel pagetable
x86: use brk allocation for DMI
arch/x86/include/asm/dmi.h | 14 +-----
arch/x86/include/asm/pgtable_32.h | 3 -
arch/x86/include/asm/sections.h | 7 +++
arch/x86/include/asm/setup.h | 7 ++-
arch/x86/kernel/head32.c | 5 +--
arch/x86/kernel/head64.c | 2 +-
arch/x86/kernel/head_32.S | 14 +++---
arch/x86/kernel/setup.c | 51 ++++++++++++++-------
arch/x86/kernel/vmlinux_32.lds.S | 9 +++-
arch/x86/kernel/vmlinux_64.lds.S | 90 ++++++++++++++++++++----------------
arch/x86/lguest/boot.c | 8 ---
arch/x86/mm/pageattr.c | 5 +-
arch/x86/xen/mmu.c | 6 +-
13 files changed, 118 insertions(+), 103 deletions(-)
diff --git a/arch/x86/include/asm/dmi.h b/arch/x86/include/asm/dmi.h
index bc68212..aa32f7e 100644
--- a/arch/x86/include/asm/dmi.h
+++ b/arch/x86/include/asm/dmi.h
@@ -2,21 +2,11 @@
#define _ASM_X86_DMI_H
#include <asm/io.h>
+#include <asm/setup.h>
-#define DMI_MAX_DATA 2048
-
-extern int dmi_alloc_index;
-extern char dmi_alloc_data[DMI_MAX_DATA];
-
-/* This is so early that there is no good way to allocate dynamic memory.
- Allocate data in an BSS array. */
static inline void *dmi_alloc(unsigned len)
{
- int idx = dmi_alloc_index;
- if ((dmi_alloc_index + len) > DMI_MAX_DATA)
- return NULL;
- dmi_alloc_index += len;
- return dmi_alloc_data + idx;
+ return extend_brk(len, sizeof(int));
}
/* Use early IO mappings for DMI because it's initialized early */
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index 97612fc..31bd120 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -42,9 +42,6 @@ extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t);
*/
#undef TEST_ACCESS_OK
-/* The boot page tables (all created as a single array) */
-extern unsigned long pg0[];
-
#ifdef CONFIG_X86_PAE
# include <asm/pgtable-3level.h>
#else
diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h
index 2b8c516..1b7ee5d 100644
--- a/arch/x86/include/asm/sections.h
+++ b/arch/x86/include/asm/sections.h
@@ -1 +1,8 @@
+#ifndef _ASM_X86_SECTIONS_H
+#define _ASM_X86_SECTIONS_H
+
#include <asm-generic/sections.h>
+
+extern char __brk_base[], __brk_limit[];
+
+#endif /* _ASM_X86_SECTIONS_H */
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index 05c6f6b..366d366 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -100,14 +100,15 @@ extern struct boot_params boot_params;
*/
#define LOWMEMSIZE() (0x9f000)
+/* exceedingly early brk-like allocator */
+extern unsigned long _brk_end;
+void *extend_brk(size_t size, size_t align);
+
#ifdef __i386__
void __init i386_start_kernel(void);
extern void probe_roms(void);
-extern unsigned long init_pg_tables_start;
-extern unsigned long init_pg_tables_end;
-
#else
void __init x86_64_start_kernel(char *real_mode);
void __init x86_64_start_reservations(char *real_mode_data);
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index ac108d1..3f8579f 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -18,7 +18,7 @@ void __init i386_start_kernel(void)
{
reserve_trampoline_memory();
- reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS");
+ reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
#ifdef CONFIG_BLK_DEV_INITRD
/* Reserve INITRD */
@@ -29,9 +29,6 @@ void __init i386_start_kernel(void)
reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");
}
#endif
- reserve_early(init_pg_tables_start, init_pg_tables_end,
- "INIT_PG_TABLE");
-
reserve_ebda_region();
/*
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index f5b2722..70eaa85 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -100,7 +100,7 @@ void __init x86_64_start_reservations(char *real_mode_data)
reserve_trampoline_memory();
- reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS");
+ reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
#ifdef CONFIG_BLK_DEV_INITRD
/* Reserve INITRD */
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 6219259..d243437 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -167,7 +167,7 @@ num_subarch_entries = (. - subarch_entries) / 4
/*
* Initialize page tables. This creates a PDE and a set of page
* tables, which are located immediately beyond _end. The variable
- * init_pg_tables_end is set up to point to the first "safe" location.
+ * _brk_end is set up to point to the first "safe" location.
* Mappings are created both at virtual address 0 (identity mapping)
* and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END.
*
@@ -190,8 +190,7 @@ default_entry:
xorl %ebx,%ebx /* %ebx is kept at zero */
- movl $pa(pg0), %edi
- movl %edi, pa(init_pg_tables_start)
+ movl $pa(__brk_base), %edi
movl $pa(swapper_pg_pmd), %edx
movl $PTE_IDENT_ATTR, %eax
10:
@@ -216,7 +215,8 @@ default_entry:
cmpl %ebp,%eax
jb 10b
1:
- movl %edi,pa(init_pg_tables_end)
+ addl $__PAGE_OFFSET, %edi
+ movl %edi, pa(_brk_end)
shrl $12, %eax
movl %eax, pa(max_pfn_mapped)
@@ -227,8 +227,7 @@ default_entry:
page_pde_offset = (__PAGE_OFFSET >> 20);
- movl $pa(pg0), %edi
- movl %edi, pa(init_pg_tables_start)
+ movl $pa(__brk_base), %edi
movl $pa(swapper_pg_dir), %edx
movl $PTE_IDENT_ATTR, %eax
10:
@@ -249,7 +248,8 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp
cmpl %ebp,%eax
jb 10b
- movl %edi,pa(init_pg_tables_end)
+ addl $__PAGE_OFFSET, %edi
+ movl %edi, pa(_brk_end)
shrl $12, %eax
movl %eax, pa(max_pfn_mapped)
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index ce9e888..b344908 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -114,6 +114,9 @@
unsigned int boot_cpu_id __read_mostly;
+static __initdata unsigned long _brk_start = (unsigned long)__brk_base;
+unsigned long _brk_end = (unsigned long)__brk_base;
+
#ifdef CONFIG_X86_64
int default_cpu_present_to_apicid(int mps_cpu)
{
@@ -158,12 +161,6 @@ static struct resource bss_resource = {
#ifdef CONFIG_X86_32
-/* This value is set up by the early boot code to point to the value
- immediately after the boot time page tables. It contains a *physical*
- address, and must not be in the .bss segment! */
-unsigned long init_pg_tables_start __initdata = ~0UL;
-unsigned long init_pg_tables_end __initdata = ~0UL;
-
static struct resource video_ram_resource = {
.name = "Video RAM area",
.start = 0xa0000,
@@ -219,12 +216,6 @@ unsigned long mmu_cr4_features = X86_CR4_PAE;
int bootloader_type;
/*
- * Early DMI memory
- */
-int dmi_alloc_index;
-char dmi_alloc_data[DMI_MAX_DATA];
-
-/*
* Setup options
*/
struct screen_info screen_info;
@@ -337,6 +328,34 @@ static void __init relocate_initrd(void)
}
#endif
+void * __init extend_brk(size_t size, size_t align)
+{
+ size_t mask = align - 1;
+ void *ret;
+
+ BUG_ON(_brk_start == 0);
+ BUG_ON(align & mask);
+
+ _brk_end = (_brk_end + mask) & ~mask;
+ BUG_ON((char *)(_brk_end + size) > __brk_limit);
+
+ ret = (void *)_brk_end;
+ _brk_end += size;
+
+ memset(ret, 0, size);
+
+ return ret;
+}
+
+static void __init reserve_brk(void)
+{
+ if (_brk_end > _brk_start)
+ reserve_early(__pa(_brk_start), __pa(_brk_end), "BRK");
+
+ /* Mark brk area as locked down and no longer taking any new allocations */
+ _brk_start = 0;
+}
+
static void __init reserve_initrd(void)
{
u64 ramdisk_image = boot_params.hdr.ramdisk_image;
@@ -717,11 +736,7 @@ void __init setup_arch(char **cmdline_p)
init_mm.start_code = (unsigned long) _text;
init_mm.end_code = (unsigned long) _etext;
init_mm.end_data = (unsigned long) _edata;
-#ifdef CONFIG_X86_32
- init_mm.brk = init_pg_tables_end + PAGE_OFFSET;
-#else
- init_mm.brk = (unsigned long) &_end;
-#endif
+ init_mm.brk = _brk_end;
code_resource.start = virt_to_phys(_text);
code_resource.end = virt_to_phys(_etext)-1;
@@ -842,6 +857,8 @@ void __init setup_arch(char **cmdline_p)
setup_bios_corruption_check();
#endif
+ reserve_brk();
+
/* max_pfn_mapped is updated here */
max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
max_pfn_mapped = max_low_pfn_mapped;
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index 0d86096..1063fbe 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -189,10 +189,13 @@ SECTIONS
*(.bss)
. = ALIGN(4);
__bss_stop = .;
- _end = . ;
- /* This is where the kernel creates the early boot page tables */
+
. = ALIGN(PAGE_SIZE);
- pg0 = . ;
+ __brk_base = . ;
+ . += 1024 * 1024 ;
+ __brk_limit = . ;
+
+ _end = . ;
}
/* Sections to be discarded */
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index fbfced6..b8b83e4 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -29,8 +29,8 @@ SECTIONS
{
. = __START_KERNEL;
phys_startup_64 = startup_64 - LOAD_OFFSET;
- _text = .; /* Text and read-only data */
.text : AT(ADDR(.text) - LOAD_OFFSET) {
+ _text = .; /* Text and read-only data */
/* First the code that has to be first for bootstrapping */
*(.text.head)
_stext = .;
@@ -61,13 +61,13 @@ SECTIONS
.data : AT(ADDR(.data) - LOAD_OFFSET) {
DATA_DATA
CONSTRUCTORS
+ _edata = .; /* End of data section */
} :data
- _edata = .; /* End of data section */
- . = ALIGN(PAGE_SIZE);
- . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
.data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) {
+ . = ALIGN(PAGE_SIZE);
+ . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
*(.data.cacheline_aligned)
}
. = ALIGN(CONFIG_X86_INTERNODE_CACHE_BYTES);
@@ -125,29 +125,29 @@ SECTIONS
#undef VVIRT_OFFSET
#undef VVIRT
- . = ALIGN(THREAD_SIZE); /* init_task */
.data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
+ . = ALIGN(THREAD_SIZE); /* init_task */
*(.data.init_task)
}:data.init
- . = ALIGN(PAGE_SIZE);
.data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
+ . = ALIGN(PAGE_SIZE);
*(.data.page_aligned)
}
- /* might get freed after init */
- . = ALIGN(PAGE_SIZE);
- __smp_alt_begin = .;
- __smp_locks = .;
.smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
+ /* might get freed after init */
+ . = ALIGN(PAGE_SIZE);
+ __smp_alt_begin = .;
+ __smp_locks = .;
*(.smp_locks)
+ __smp_locks_end = .;
+ . = ALIGN(PAGE_SIZE);
+ __smp_alt_end = .;
}
- __smp_locks_end = .;
- . = ALIGN(PAGE_SIZE);
- __smp_alt_end = .;
. = ALIGN(PAGE_SIZE); /* Init code and data */
- __init_begin = .;
+ __init_begin = .; /* paired with __init_end */
.init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
_sinittext = .;
INIT_TEXT
@@ -159,40 +159,42 @@ SECTIONS
__initdata_end = .;
}
- . = ALIGN(16);
- __setup_start = .;
- .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { *(.init.setup) }
- __setup_end = .;
- __initcall_start = .;
+ .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) {
+ . = ALIGN(16);
+ __setup_start = .;
+ *(.init.setup)
+ __setup_end = .;
+ }
.initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
+ __initcall_start = .;
INITCALLS
+ __initcall_end = .;
}
- __initcall_end = .;
- __con_initcall_start = .;
.con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
+ __con_initcall_start = .;
*(.con_initcall.init)
+ __con_initcall_end = .;
}
- __con_initcall_end = .;
- __x86_cpu_dev_start = .;
.x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
+ __x86_cpu_dev_start = .;
*(.x86_cpu_dev.init)
+ __x86_cpu_dev_end = .;
}
- __x86_cpu_dev_end = .;
SECURITY_INIT
. = ALIGN(8);
.parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
- __parainstructions = .;
+ __parainstructions = .;
*(.parainstructions)
- __parainstructions_end = .;
+ __parainstructions_end = .;
}
- . = ALIGN(8);
- __alt_instructions = .;
.altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
+ . = ALIGN(8);
+ __alt_instructions = .;
*(.altinstructions)
+ __alt_instructions_end = .;
}
- __alt_instructions_end = .;
.altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
*(.altinstr_replacement)
}
@@ -207,9 +209,11 @@ SECTIONS
#ifdef CONFIG_BLK_DEV_INITRD
. = ALIGN(PAGE_SIZE);
- __initramfs_start = .;
- .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { *(.init.ramfs) }
- __initramfs_end = .;
+ .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
+ __initramfs_start = .;
+ *(.init.ramfs)
+ __initramfs_end = .;
+ }
#endif
#ifdef CONFIG_SMP
@@ -229,20 +233,26 @@ SECTIONS
. = ALIGN(PAGE_SIZE);
__init_end = .;
- . = ALIGN(PAGE_SIZE);
- __nosave_begin = .;
.data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
- *(.data.nosave)
+ . = ALIGN(PAGE_SIZE);
+ __nosave_begin = .;
+ *(.data.nosave)
+ . = ALIGN(PAGE_SIZE);
+ __nosave_end = .;
} :data.init2 /* use another section data.init2, see PERCPU_VADDR() above */
- . = ALIGN(PAGE_SIZE);
- __nosave_end = .;
- __bss_start = .; /* BSS */
.bss : AT(ADDR(.bss) - LOAD_OFFSET) {
+ . = ALIGN(PAGE_SIZE);
+ __bss_start = .; /* BSS */
*(.bss.page_aligned)
*(.bss)
- }
- __bss_stop = .;
+ __bss_stop = .;
+
+ . = ALIGN(PAGE_SIZE);
+ __brk_base = . ;
+ . += 1024 * 1024 ;
+ __brk_limit = . ;
+ }
_end = . ;
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 9fe4dda..90e44a1 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -1058,14 +1058,6 @@ __init void lguest_init(void)
* lguest_init() where the rest of the fairly chaotic boot setup
* occurs. */
- /* The native boot code sets up initial page tables immediately after
- * the kernel itself, and sets init_pg_tables_end so they're not
- * clobbered. The Launcher places our initial pagetables somewhere at
- * the top of our physical memory, so we don't need extra space: set
- * init_pg_tables_end to the end of the kernel. */
- init_pg_tables_start = __pa(pg0);
- init_pg_tables_end = __pa(pg0);
-
/* As described in head_32.S, we map the first 128M of memory. */
max_pfn_mapped = (128*1024*1024) >> PAGE_SHIFT;
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 4629a87..8eb4eaa 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -16,6 +16,7 @@
#include <asm/processor.h>
#include <asm/tlbflush.h>
#include <asm/sections.h>
+#include <asm/setup.h>
#include <asm/uaccess.h>
#include <asm/pgalloc.h>
#include <asm/proto.h>
@@ -95,7 +96,7 @@ static inline unsigned long highmap_start_pfn(void)
static inline unsigned long highmap_end_pfn(void)
{
- return __pa(roundup((unsigned long)_end, PMD_SIZE)) >> PAGE_SHIFT;
+ return __pa(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT;
}
#endif
@@ -700,7 +701,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
* No need to redo, when the primary call touched the high
* mapping already:
*/
- if (within(vaddr, (unsigned long) _text, (unsigned long) _end))
+ if (within(vaddr, (unsigned long) _text, _brk_end))
return 0;
/*
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index cb6afa4..72f6a76 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1723,9 +1723,9 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
{
pmd_t *kernel_pmd;
- init_pg_tables_start = __pa(pgd);
- init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE;
- max_pfn_mapped = PFN_DOWN(init_pg_tables_end + 512*1024);
+ max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) +
+ xen_start_info->nr_pt_frames * PAGE_SIZE +
+ 512*1024);
kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD);
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [GIT PULL] x86: add brk allocator for very early allocations
2009-03-11 16:59 [GIT PULL] x86: add brk allocator for very early allocations Jeremy Fitzhardinge
@ 2009-03-11 18:19 ` Yinghai Lu
2009-03-12 23:59 ` Jeremy Fitzhardinge
2009-03-11 19:20 ` Eric W. Biederman
1 sibling, 1 reply; 12+ messages in thread
From: Yinghai Lu @ 2009-03-11 18:19 UTC (permalink / raw)
To: Jeremy Fitzhardinge
Cc: H. Peter Anvin, Ingo Molnar, the arch/x86 maintainers,
Eric W. Biederman, Linux Kernel Mailing List
Jeremy Fitzhardinge wrote:
> Aggregate patch below.
>
> The following changes since commit
> 11f5585820ae805c48f41c09bc260d0e51744792:
> Ingo Molnar (1):
> Merge branch 'tracing/ftrace'
>
> are available in the git repository at:
>
> git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen.git push/x86/brk
>
> Jeremy Fitzhardinge (4):
> x86: make section delimiter symbols part of their section
> x86: add brk allocation for very, very early allocations
> x86-32: use brk segment for allocating initial kernel pagetable
> x86: use brk allocation for DMI
>
> arch/x86/include/asm/dmi.h | 14 +-----
> arch/x86/include/asm/pgtable_32.h | 3 -
> arch/x86/include/asm/sections.h | 7 +++
> arch/x86/include/asm/setup.h | 7 ++-
> arch/x86/kernel/head32.c | 5 +--
> arch/x86/kernel/head64.c | 2 +-
> arch/x86/kernel/head_32.S | 14 +++---
> arch/x86/kernel/setup.c | 51 ++++++++++++++-------
> arch/x86/kernel/vmlinux_32.lds.S | 9 +++-
> arch/x86/kernel/vmlinux_64.lds.S | 90
> ++++++++++++++++++++----------------
> arch/x86/lguest/boot.c | 8 ---
> arch/x86/mm/pageattr.c | 5 +-
> arch/x86/xen/mmu.c | 6 +-
> 13 files changed, 118 insertions(+), 103 deletions(-)
>
> diff --git a/arch/x86/include/asm/dmi.h b/arch/x86/include/asm/dmi.h
> index bc68212..aa32f7e 100644
> --- a/arch/x86/include/asm/dmi.h
> +++ b/arch/x86/include/asm/dmi.h
> @@ -2,21 +2,11 @@
> #define _ASM_X86_DMI_H
>
> #include <asm/io.h>
> +#include <asm/setup.h>
>
> -#define DMI_MAX_DATA 2048
> -
> -extern int dmi_alloc_index;
> -extern char dmi_alloc_data[DMI_MAX_DATA];
> -
> -/* This is so early that there is no good way to allocate dynamic memory.
> - Allocate data in an BSS array. */
> static inline void *dmi_alloc(unsigned len)
> {
> - int idx = dmi_alloc_index;
> - if ((dmi_alloc_index + len) > DMI_MAX_DATA)
> - return NULL;
> - dmi_alloc_index += len;
> - return dmi_alloc_data + idx;
> + return extend_brk(len, sizeof(int));
> }
>
> /* Use early IO mappings for DMI because it's initialized early */
> diff --git a/arch/x86/include/asm/pgtable_32.h
> b/arch/x86/include/asm/pgtable_32.h
> index 97612fc..31bd120 100644
> --- a/arch/x86/include/asm/pgtable_32.h
> +++ b/arch/x86/include/asm/pgtable_32.h
> @@ -42,9 +42,6 @@ extern void set_pmd_pfn(unsigned long, unsigned long,
> pgprot_t);
> */
> #undef TEST_ACCESS_OK
>
> -/* The boot page tables (all created as a single array) */
> -extern unsigned long pg0[];
> -
> #ifdef CONFIG_X86_PAE
> # include <asm/pgtable-3level.h>
> #else
> diff --git a/arch/x86/include/asm/sections.h
> b/arch/x86/include/asm/sections.h
> index 2b8c516..1b7ee5d 100644
> --- a/arch/x86/include/asm/sections.h
> +++ b/arch/x86/include/asm/sections.h
> @@ -1 +1,8 @@
> +#ifndef _ASM_X86_SECTIONS_H
> +#define _ASM_X86_SECTIONS_H
> +
> #include <asm-generic/sections.h>
> +
> +extern char __brk_base[], __brk_limit[];
> +
> +#endif /* _ASM_X86_SECTIONS_H */
> diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
> index 05c6f6b..366d366 100644
> --- a/arch/x86/include/asm/setup.h
> +++ b/arch/x86/include/asm/setup.h
> @@ -100,14 +100,15 @@ extern struct boot_params boot_params;
> */
> #define LOWMEMSIZE() (0x9f000)
>
> +/* exceedingly early brk-like allocator */
> +extern unsigned long _brk_end;
> +void *extend_brk(size_t size, size_t align);
> +
> #ifdef __i386__
>
> void __init i386_start_kernel(void);
> extern void probe_roms(void);
>
> -extern unsigned long init_pg_tables_start;
> -extern unsigned long init_pg_tables_end;
> -
> #else
> void __init x86_64_start_kernel(char *real_mode);
> void __init x86_64_start_reservations(char *real_mode_data);
> diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
> index ac108d1..3f8579f 100644
> --- a/arch/x86/kernel/head32.c
> +++ b/arch/x86/kernel/head32.c
> @@ -18,7 +18,7 @@ void __init i386_start_kernel(void)
> {
> reserve_trampoline_memory();
>
> - reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA
> BSS");
> + reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT
> DATA BSS");
>
> #ifdef CONFIG_BLK_DEV_INITRD
> /* Reserve INITRD */
> @@ -29,9 +29,6 @@ void __init i386_start_kernel(void)
> reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");
> }
> #endif
> - reserve_early(init_pg_tables_start, init_pg_tables_end,
> - "INIT_PG_TABLE");
> -
> reserve_ebda_region();
>
> /*
> diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
> index f5b2722..70eaa85 100644
> --- a/arch/x86/kernel/head64.c
> +++ b/arch/x86/kernel/head64.c
> @@ -100,7 +100,7 @@ void __init x86_64_start_reservations(char
> *real_mode_data)
>
> reserve_trampoline_memory();
>
> - reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA
> BSS");
> + reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT
> DATA BSS");
>
> #ifdef CONFIG_BLK_DEV_INITRD
> /* Reserve INITRD */
> diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
> index 6219259..d243437 100644
> --- a/arch/x86/kernel/head_32.S
> +++ b/arch/x86/kernel/head_32.S
> @@ -167,7 +167,7 @@ num_subarch_entries = (. - subarch_entries) / 4
> /*
> * Initialize page tables. This creates a PDE and a set of page
> * tables, which are located immediately beyond _end. The variable
> - * init_pg_tables_end is set up to point to the first "safe" location.
> + * _brk_end is set up to point to the first "safe" location.
> * Mappings are created both at virtual address 0 (identity mapping)
> * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END.
> *
> @@ -190,8 +190,7 @@ default_entry:
>
> xorl %ebx,%ebx /* %ebx is kept at zero */
>
> - movl $pa(pg0), %edi
> - movl %edi, pa(init_pg_tables_start)
> + movl $pa(__brk_base), %edi
> movl $pa(swapper_pg_pmd), %edx
> movl $PTE_IDENT_ATTR, %eax
> 10:
> @@ -216,7 +215,8 @@ default_entry:
> cmpl %ebp,%eax
> jb 10b
> 1:
> - movl %edi,pa(init_pg_tables_end)
> + addl $__PAGE_OFFSET, %edi
> + movl %edi, pa(_brk_end)
> shrl $12, %eax
> movl %eax, pa(max_pfn_mapped)
>
> @@ -227,8 +227,7 @@ default_entry:
>
> page_pde_offset = (__PAGE_OFFSET >> 20);
>
> - movl $pa(pg0), %edi
> - movl %edi, pa(init_pg_tables_start)
> + movl $pa(__brk_base), %edi
> movl $pa(swapper_pg_dir), %edx
> movl $PTE_IDENT_ATTR, %eax
> 10:
> @@ -249,7 +248,8 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
> leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp
> cmpl %ebp,%eax
> jb 10b
> - movl %edi,pa(init_pg_tables_end)
> + addl $__PAGE_OFFSET, %edi
> + movl %edi, pa(_brk_end)
> shrl $12, %eax
> movl %eax, pa(max_pfn_mapped)
>
> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
> index ce9e888..b344908 100644
> --- a/arch/x86/kernel/setup.c
> +++ b/arch/x86/kernel/setup.c
> @@ -114,6 +114,9 @@
>
> unsigned int boot_cpu_id __read_mostly;
>
> +static __initdata unsigned long _brk_start = (unsigned long)__brk_base;
> +unsigned long _brk_end = (unsigned long)__brk_base;
> +
> #ifdef CONFIG_X86_64
> int default_cpu_present_to_apicid(int mps_cpu)
> {
> @@ -158,12 +161,6 @@ static struct resource bss_resource = {
>
>
> #ifdef CONFIG_X86_32
> -/* This value is set up by the early boot code to point to the value
> - immediately after the boot time page tables. It contains a *physical*
> - address, and must not be in the .bss segment! */
> -unsigned long init_pg_tables_start __initdata = ~0UL;
> -unsigned long init_pg_tables_end __initdata = ~0UL;
> -
> static struct resource video_ram_resource = {
> .name = "Video RAM area",
> .start = 0xa0000,
> @@ -219,12 +216,6 @@ unsigned long mmu_cr4_features = X86_CR4_PAE;
> int bootloader_type;
>
> /*
> - * Early DMI memory
> - */
> -int dmi_alloc_index;
> -char dmi_alloc_data[DMI_MAX_DATA];
> -
> -/*
> * Setup options
> */
> struct screen_info screen_info;
> @@ -337,6 +328,34 @@ static void __init relocate_initrd(void)
> }
> #endif
>
> +void * __init extend_brk(size_t size, size_t align)
> +{
> + size_t mask = align - 1;
> + void *ret;
> +
> + BUG_ON(_brk_start == 0);
> + BUG_ON(align & mask);
> +
> + _brk_end = (_brk_end + mask) & ~mask;
> + BUG_ON((char *)(_brk_end + size) > __brk_limit);
> +
> + ret = (void *)_brk_end;
> + _brk_end += size;
> +
> + memset(ret, 0, size);
> +
> + return ret;
> +}
> +
> +static void __init reserve_brk(void)
> +{
> + if (_brk_end > _brk_start)
> + reserve_early(__pa(_brk_start), __pa(_brk_end), "BRK");
> +
> + /* Mark brk area as locked down and no longer taking any new
> allocations */
> + _brk_start = 0;
> +}
> +
> static void __init reserve_initrd(void)
> {
> u64 ramdisk_image = boot_params.hdr.ramdisk_image;
> @@ -717,11 +736,7 @@ void __init setup_arch(char **cmdline_p)
> init_mm.start_code = (unsigned long) _text;
> init_mm.end_code = (unsigned long) _etext;
> init_mm.end_data = (unsigned long) _edata;
> -#ifdef CONFIG_X86_32
> - init_mm.brk = init_pg_tables_end + PAGE_OFFSET;
> -#else
> - init_mm.brk = (unsigned long) &_end;
> -#endif
> + init_mm.brk = _brk_end;
>
> code_resource.start = virt_to_phys(_text);
> code_resource.end = virt_to_phys(_etext)-1;
> @@ -842,6 +857,8 @@ void __init setup_arch(char **cmdline_p)
> setup_bios_corruption_check();
> #endif
>
> + reserve_brk();
> +
> /* max_pfn_mapped is updated here */
> max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
> max_pfn_mapped = max_low_pfn_mapped;
> diff --git a/arch/x86/kernel/vmlinux_32.lds.S
> b/arch/x86/kernel/vmlinux_32.lds.S
> index 0d86096..1063fbe 100644
> --- a/arch/x86/kernel/vmlinux_32.lds.S
> +++ b/arch/x86/kernel/vmlinux_32.lds.S
> @@ -189,10 +189,13 @@ SECTIONS
> *(.bss)
> . = ALIGN(4);
> __bss_stop = .;
> - _end = . ;
> - /* This is where the kernel creates the early boot page tables */
> +
> . = ALIGN(PAGE_SIZE);
> - pg0 = . ;
> + __brk_base = . ;
> + . += 1024 * 1024 ;
> + __brk_limit = . ;
could have more explanation about the 1M size.
because initial_pg_tables will sit in it. please consider to add something like
in head_32.S
LOW_PAGES = (KERNEL_IMAGE_SIZE + PAGE_SIZE_asm - 1)>>PAGE_SHIFT
#if PTRS_PER_PMD > 1
PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PMD) + PTRS_PER_PGD
#else
PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PGD)
#endif
ALLOCATOR_SLOP = 4
INIT_MAP_SIZE = (PAGE_TABLE_SIZE + ALLOCATOR_SLOP) * PAGE_SIZE_asm
...
+
+.section ".bss.extra_page_aligned","wa"
+ .align PAGE_SIZE_asm
+ .fill INIT_MAP_SIZE,1,0
@@ -205,6 +208,12 @@ SECTIONS
DWARF_DEBUG
}
+/*
+ * Build-time check on the image size:
+ */
+ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
+ "kernel image bigger than KERNEL_IMAGE_SIZE")
+
#ifdef CONFIG_KEXEC
/* Link time checks */
#include <asm/kexec.h>
Index: linux-2.6/arch/x86/include/asm/page_32_types.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/page_32_types.h
+++ linux-2.6/arch/x86/include/asm/page_32_types.h
@@ -39,6 +39,11 @@
#define __VIRTUAL_MASK_SHIFT 32
#endif /* CONFIG_X86_PAE */
+/*
+ * Kernel image size is limited to 512 MB (see in arch/x86/kernel/head_32.S)
+ */
+#define KERNEL_IMAGE_SIZE (512 * 1024 * 1024)
+
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [GIT PULL] x86: add brk allocator for very early allocations
2009-03-11 16:59 [GIT PULL] x86: add brk allocator for very early allocations Jeremy Fitzhardinge
2009-03-11 18:19 ` Yinghai Lu
@ 2009-03-11 19:20 ` Eric W. Biederman
2009-03-11 23:53 ` Jeremy Fitzhardinge
1 sibling, 1 reply; 12+ messages in thread
From: Eric W. Biederman @ 2009-03-11 19:20 UTC (permalink / raw)
To: Jeremy Fitzhardinge
Cc: H. Peter Anvin, Ingo Molnar, the arch/x86 maintainers,
Yinghai Lu, Linux Kernel Mailing List
Jeremy Fitzhardinge <jeremy@goop.org> writes:
> Aggregate patch below.
>
> The following changes since commit 11f5585820ae805c48f41c09bc260d0e51744792:
> Ingo Molnar (1):
> Merge branch 'tracing/ftrace'
>
> are available in the git repository at:
>
> git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen.git push/x86/brk
>
> Jeremy Fitzhardinge (4):
> x86: make section delimiter symbols part of their section
> x86: add brk allocation for very, very early allocations
> x86-32: use brk segment for allocating initial kernel pagetable
> x86: use brk allocation for DMI
Nacked-by: "Eric W. Biederman" <ebiederm@xmission.com>
extend_brk is the wrong way to go. We already have a better mechanism.
find_e820_early paired with reserve_early.
Allocating the early page tables are a very special case. There is
a case for cleaning up that mechanism and making more comprehensible.
We should not be generalizing it, and making the kernel more fragile.
Overall I think there is a lot of good work in the patch, but taken
as a whole it seems to be moving us in the wrong direction.
Eric
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [GIT PULL] x86: add brk allocator for very early allocations
2009-03-11 19:20 ` Eric W. Biederman
@ 2009-03-11 23:53 ` Jeremy Fitzhardinge
0 siblings, 0 replies; 12+ messages in thread
From: Jeremy Fitzhardinge @ 2009-03-11 23:53 UTC (permalink / raw)
To: Eric W. Biederman
Cc: H. Peter Anvin, Ingo Molnar, the arch/x86 maintainers,
Yinghai Lu, Linux Kernel Mailing List
Eric W. Biederman wrote:
> extend_brk is the wrong way to go. We already have a better mechanism.
> find_e820_early paired with reserve_early.
>
No doubt that's a better option when available. But I want to allocate
earlier than that.
> Allocating the early page tables are a very special case. There is
> a case for cleaning up that mechanism and making more comprehensible.
> We should not be generalizing it, and making the kernel more fragile.
>
More fragile? I don't see that extend_brk() is a particularly fragile
mechanism. I guess a user could start over-using it and running out of
the initial space. That would fail in a fairly unambiguous way (there's
a BUG_ON checking for an attempt to extend beyond __brk_limit), and
would definitely be an abuse of the call.
My motivation for this patch is to dynamically allocate things very
early, before the e820 map is available. Specifically, I want to
dynamically allocate various Xen datastructures which would otherwise
statically waste space in the bss (in the case where you're running a
Xen-enabled kernel on a non-Xen system). It also allows me to scale
them according to the memory size, etc. I need to do this before e820
is available; indeed, I need to do it to synthesize an appropriate e820
map for the kernel to consume.
It is also nice that it generalizes head_32.S's pagetable construction,
and mops up the bespoke DMI allocator. There may well be some other
potential users. I think any static data in bss is fair game,
particularly if it is only used conditionally.
J
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [GIT PULL] x86: add brk allocator for very early allocations
2009-03-11 18:19 ` Yinghai Lu
@ 2009-03-12 23:59 ` Jeremy Fitzhardinge
2009-03-13 0:44 ` Yinghai Lu
0 siblings, 1 reply; 12+ messages in thread
From: Jeremy Fitzhardinge @ 2009-03-12 23:59 UTC (permalink / raw)
To: Yinghai Lu
Cc: H. Peter Anvin, Ingo Molnar, the arch/x86 maintainers,
Eric W. Biederman, Linux Kernel Mailing List
Yinghai Lu wrote:
> could have more explanation about the 1M size.
> because initial_pg_tables will sit in it. please consider to add something like
>
> in head_32.S
>
> LOW_PAGES = (KERNEL_IMAGE_SIZE + PAGE_SIZE_asm - 1)>>PAGE_SHIFT
>
> #if PTRS_PER_PMD > 1
> PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PMD) + PTRS_PER_PGD
> #else
> PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PGD)
> #endif
> ALLOCATOR_SLOP = 4
>
OK, how does this look:
The following changes since commit 21e8ba72daf5d7f0af33968f873499c85f96ccef:
Jeremy Fitzhardinge (1):
x86: use brk allocation for DMI
are available in the git repository at:
git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen.git push/x86/brk
Jeremy Fitzhardinge (1):
x86: allow extend_brk users to reserve brk space
Yinghai Lu (1):
x86-32: compute initial mapping size more accurately
arch/x86/include/asm/page_32_types.h | 5 +++++
arch/x86/include/asm/setup.h | 30 ++++++++++++++++++++++++++++++
arch/x86/kernel/head_32.S | 4 +++-
arch/x86/kernel/setup.c | 2 ++
arch/x86/kernel/vmlinux_32.lds.S | 4 +++-
arch/x86/kernel/vmlinux_64.lds.S | 4 +++-
6 files changed, 46 insertions(+), 3 deletions(-)
git diff 21e8ba72daf5d7f0af33968f873499c85f96ccef..push/x86/brk
diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h
index f1e4a79..0f915ae 100644
--- a/arch/x86/include/asm/page_32_types.h
+++ b/arch/x86/include/asm/page_32_types.h
@@ -39,6 +39,11 @@
#define __VIRTUAL_MASK_SHIFT 32
#endif /* CONFIG_X86_PAE */
+/*
+ * Kernel image size is limited to 512 MB (see in arch/x86/kernel/head_32.S)
+ */
+#define KERNEL_IMAGE_SIZE (512 * 1024 * 1024)
+
#ifndef __ASSEMBLY__
/*
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index 366d366..61b126b 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -104,6 +104,29 @@ extern struct boot_params boot_params;
extern unsigned long _brk_end;
void *extend_brk(size_t size, size_t align);
+/*
+ * Reserve space in the brk section. The name must be unique within
+ * the file, and somewhat descriptive. The size is in bytes. Must be
+ * used at file scope.
+ *
+ * (This uses a temp function to wrap the asm so we can pass it the
+ * size parameter; otherwise we wouldn't be able to. We can't use a
+ * "section" attribute on a normal variable because it always ends up
+ * being @progbits, which ends up allocating space in the vmlinux
+ * executable.)
+ */
+#define RESERVE_BRK(name,sz) \
+ static void __section(.discard) __used \
+ __brk_reservation_fn_##name##__(void) { \
+ asm volatile ( \
+ ".pushsection .brk_reservation,\"aw\",@nobits;" \
+ "__brk_reservation_" #name "__:" \
+ " 1:.skip %c0;" \
+ " .size __brk_reservation_" #name "__, . - 1b;" \
+ " .popsection" \
+ : : "i" (sz)); \
+ }
+
#ifdef __i386__
void __init i386_start_kernel(void);
@@ -115,6 +138,13 @@ void __init x86_64_start_reservations(char *real_mode_data);
#endif /* __i386__ */
#endif /* _SETUP */
+#else
+#define RESERVE_BRK(name,sz) \
+ .pushsection .brk_reservation,"aw",@nobits; \
+__brk_reservation_##name##__: \
+1: .skip sz; \
+ .size __brk_reservation_##name##__,.-1b; \
+ .popsection
#endif /* __ASSEMBLY__ */
#endif /* __KERNEL__ */
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index d243437..80dc05e 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -54,7 +54,7 @@
*
* This should be a multiple of a page.
*/
-LOW_PAGES = 1<<(32-PAGE_SHIFT_asm)
+LOW_PAGES = (KERNEL_IMAGE_SIZE + PAGE_SIZE_asm - 1)>>PAGE_SHIFT
/*
* To preserve the DMA pool in PAGEALLOC kernels, we'll allocate
@@ -75,6 +75,8 @@ ALLOCATOR_SLOP = 4
INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_SIZE_asm
+RESERVE_BRK(pagetables, PAGE_TABLE_SIZE * PAGE_SIZE)
+
/*
* 32-bit kernel entrypoint; only used by the boot CPU. On entry,
* %esi points to the real-mode code as a 32-bit pointer.
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index b344908..d633958 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -112,6 +112,8 @@
#define ARCH_SETUP
#endif
+RESERVE_BRK(dmi_alloc, 65536);
+
unsigned int boot_cpu_id __read_mostly;
static __initdata unsigned long _brk_start = (unsigned long)__brk_base;
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index 1063fbe..4005279 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -192,7 +192,8 @@ SECTIONS
. = ALIGN(PAGE_SIZE);
__brk_base = . ;
- . += 1024 * 1024 ;
+ . += 64 * 1024 ; /* 64k slop space */
+ *(.brk_reservation) /* areas brk users have reserved */
__brk_limit = . ;
_end = . ;
@@ -201,6 +202,7 @@ SECTIONS
/* Sections to be discarded */
/DISCARD/ : {
*(.exitcall.exit)
+ *(.discard)
}
STABS_DEBUG
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index b8b83e4..47deee3 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -250,7 +250,8 @@ SECTIONS
. = ALIGN(PAGE_SIZE);
__brk_base = . ;
- . += 1024 * 1024 ;
+ . += 64 * 1024 ; /* 64k slop space */
+ *(.brk_reservation) /* areas brk users have reserved */
__brk_limit = . ;
}
@@ -260,6 +261,7 @@ SECTIONS
/DISCARD/ : {
*(.exitcall.exit)
*(.eh_frame)
+ *(.discard)
}
STABS_DEBUG
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [GIT PULL] x86: add brk allocator for very early allocations
2009-03-12 23:59 ` Jeremy Fitzhardinge
@ 2009-03-13 0:44 ` Yinghai Lu
2009-03-13 20:27 ` Jeremy Fitzhardinge
0 siblings, 1 reply; 12+ messages in thread
From: Yinghai Lu @ 2009-03-13 0:44 UTC (permalink / raw)
To: Jeremy Fitzhardinge
Cc: H. Peter Anvin, Ingo Molnar, the arch/x86 maintainers,
Eric W. Biederman, Linux Kernel Mailing List
Jeremy Fitzhardinge wrote:
> Yinghai Lu wrote:
>> could have more explanation about the 1M size.
>> because initial_pg_tables will sit in it. please consider to add
>> something like
>>
>> in head_32.S
>>
>> LOW_PAGES = (KERNEL_IMAGE_SIZE + PAGE_SIZE_asm - 1)>>PAGE_SHIFT
>>
>> #if PTRS_PER_PMD > 1
>> PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PMD) + PTRS_PER_PGD
>> #else
>> PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PGD)
>> #endif
>> ALLOCATOR_SLOP = 4
>>
>
> OK, how does this look:
>
> The following changes since commit
> 21e8ba72daf5d7f0af33968f873499c85f96ccef:
> Jeremy Fitzhardinge (1):
> x86: use brk allocation for DMI
>
> are available in the git repository at:
>
> git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen.git push/x86/brk
>
> Jeremy Fitzhardinge (1):
> x86: allow extend_brk users to reserve brk space
>
> Yinghai Lu (1):
> x86-32: compute initial mapping size more accurately
>
> arch/x86/include/asm/page_32_types.h | 5 +++++
> arch/x86/include/asm/setup.h | 30 ++++++++++++++++++++++++++++++
> arch/x86/kernel/head_32.S | 4 +++-
> arch/x86/kernel/setup.c | 2 ++
> arch/x86/kernel/vmlinux_32.lds.S | 4 +++-
> arch/x86/kernel/vmlinux_64.lds.S | 4 +++-
> 6 files changed, 46 insertions(+), 3 deletions(-)
>
> git diff 21e8ba72daf5d7f0af33968f873499c85f96ccef..push/x86/brk
> diff --git a/arch/x86/include/asm/page_32_types.h
> b/arch/x86/include/asm/page_32_types.h
> index f1e4a79..0f915ae 100644
> --- a/arch/x86/include/asm/page_32_types.h
> +++ b/arch/x86/include/asm/page_32_types.h
> @@ -39,6 +39,11 @@
> #define __VIRTUAL_MASK_SHIFT 32
> #endif /* CONFIG_X86_PAE */
>
> +/*
> + * Kernel image size is limited to 512 MB (see in
> arch/x86/kernel/head_32.S)
> + */
> +#define KERNEL_IMAGE_SIZE (512 * 1024 * 1024)
> +
> #ifndef __ASSEMBLY__
>
> /*
> diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
> index 366d366..61b126b 100644
> --- a/arch/x86/include/asm/setup.h
> +++ b/arch/x86/include/asm/setup.h
> @@ -104,6 +104,29 @@ extern struct boot_params boot_params;
> extern unsigned long _brk_end;
> void *extend_brk(size_t size, size_t align);
>
> +/*
> + * Reserve space in the brk section. The name must be unique within
> + * the file, and somewhat descriptive. The size is in bytes. Must be
> + * used at file scope.
> + *
> + * (This uses a temp function to wrap the asm so we can pass it the
> + * size parameter; otherwise we wouldn't be able to. We can't use a
> + * "section" attribute on a normal variable because it always ends up
> + * being @progbits, which ends up allocating space in the vmlinux
> + * executable.)
> + */
> +#define RESERVE_BRK(name,sz) \
> + static void __section(.discard) __used \
> + __brk_reservation_fn_##name##__(void) { \
> + asm volatile ( \
> + ".pushsection .brk_reservation,\"aw\",@nobits;" \
> + "__brk_reservation_" #name "__:" \
> + " 1:.skip %c0;" \
> + " .size __brk_reservation_" #name "__, . - 1b;" \
> + " .popsection" \
> + : : "i" (sz)); \
> + }
> +
> #ifdef __i386__
>
> void __init i386_start_kernel(void);
> @@ -115,6 +138,13 @@ void __init x86_64_start_reservations(char
> *real_mode_data);
>
> #endif /* __i386__ */
> #endif /* _SETUP */
> +#else
> +#define RESERVE_BRK(name,sz) \
> + .pushsection .brk_reservation,"aw",@nobits; \
> +__brk_reservation_##name##__: \
> +1: .skip sz; \
> + .size __brk_reservation_##name##__,.-1b; \
> + .popsection
> #endif /* __ASSEMBLY__ */
> #endif /* __KERNEL__ */
>
> diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
> index d243437..80dc05e 100644
> --- a/arch/x86/kernel/head_32.S
> +++ b/arch/x86/kernel/head_32.S
> @@ -54,7 +54,7 @@
> *
> * This should be a multiple of a page.
> */
> -LOW_PAGES = 1<<(32-PAGE_SHIFT_asm)
> +LOW_PAGES = (KERNEL_IMAGE_SIZE + PAGE_SIZE_asm - 1)>>PAGE_SHIFT
>
> /*
> * To preserve the DMA pool in PAGEALLOC kernels, we'll allocate
> @@ -75,6 +75,8 @@ ALLOCATOR_SLOP = 4
>
> INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE +
> ALLOCATOR_SLOP)*PAGE_SIZE_asm
no user for INIT_MAP_BEYOND_END any more.
>
> +RESERVE_BRK(pagetables, PAGE_TABLE_SIZE * PAGE_SIZE)
> +
> /*
> * 32-bit kernel entrypoint; only used by the boot CPU. On entry,
> * %esi points to the real-mode code as a 32-bit pointer.
> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
> index b344908..d633958 100644
> --- a/arch/x86/kernel/setup.c
> +++ b/arch/x86/kernel/setup.c
> @@ -112,6 +112,8 @@
> #define ARCH_SETUP
> #endif
>
> +RESERVE_BRK(dmi_alloc, 65536);
> +
> unsigned int boot_cpu_id __read_mostly;
>
> static __initdata unsigned long _brk_start = (unsigned long)__brk_base;
> diff --git a/arch/x86/kernel/vmlinux_32.lds.S
> b/arch/x86/kernel/vmlinux_32.lds.S
> index 1063fbe..4005279 100644
> --- a/arch/x86/kernel/vmlinux_32.lds.S
> +++ b/arch/x86/kernel/vmlinux_32.lds.S
> @@ -192,7 +192,8 @@ SECTIONS
>
> . = ALIGN(PAGE_SIZE);
> __brk_base = . ;
> - . += 1024 * 1024 ;
> + . += 64 * 1024 ; /* 64k slop space */
> + *(.brk_reservation) /* areas brk users have reserved */
> __brk_limit = . ;
>
> _end = . ;
> @@ -201,6 +202,7 @@ SECTIONS
> /* Sections to be discarded */
> /DISCARD/ : {
> *(.exitcall.exit)
> + *(.discard)
> }
>
> STABS_DEBUG
could add
/*
* Build-time check on the image size:
*/
ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
"kernel image bigger than KERNEL_IMAGE_SIZE")
for 32bit
YH
> diff --git a/arch/x86/kernel/vmlinux_64.lds.S
> b/arch/x86/kernel/vmlinux_64.lds.S
> index b8b83e4..47deee3 100644
> --- a/arch/x86/kernel/vmlinux_64.lds.S
> +++ b/arch/x86/kernel/vmlinux_64.lds.S
> @@ -250,7 +250,8 @@ SECTIONS
>
> . = ALIGN(PAGE_SIZE);
> __brk_base = . ;
> - . += 1024 * 1024 ;
> + . += 64 * 1024 ; /* 64k slop space */
> + *(.brk_reservation) /* areas brk users have reserved */
> __brk_limit = . ;
> }
>
> @@ -260,6 +261,7 @@ SECTIONS
> /DISCARD/ : {
> *(.exitcall.exit)
> *(.eh_frame)
> + *(.discard)
> }
>
> STABS_DEBUG
>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [GIT PULL] x86: add brk allocator for very early allocations
2009-03-13 0:44 ` Yinghai Lu
@ 2009-03-13 20:27 ` Jeremy Fitzhardinge
2009-03-13 21:03 ` Yinghai Lu
2009-03-13 22:45 ` H. Peter Anvin
0 siblings, 2 replies; 12+ messages in thread
From: Jeremy Fitzhardinge @ 2009-03-13 20:27 UTC (permalink / raw)
To: Yinghai Lu
Cc: H. Peter Anvin, Ingo Molnar, the arch/x86 maintainers,
Eric W. Biederman, Linux Kernel Mailing List
Yinghai Lu wrote:
> Jeremy Fitzhardinge wrote:
>
>> Yinghai Lu wrote:
>>
>>> could have more explanation about the 1M size.
>>> because initial_pg_tables will sit in it. please consider to add
>>> something like
>>>
>>> in head_32.S
>>>
>>> LOW_PAGES = (KERNEL_IMAGE_SIZE + PAGE_SIZE_asm - 1)>>PAGE_SHIFT
>>>
>>> #if PTRS_PER_PMD > 1
>>> PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PMD) + PTRS_PER_PGD
>>> #else
>>> PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PGD)
>>> #endif
>>> ALLOCATOR_SLOP = 4
>>>
>>>
>> OK, how does this look:
>>
>> The following changes since commit
>> 21e8ba72daf5d7f0af33968f873499c85f96ccef:
>> Jeremy Fitzhardinge (1):
>> x86: use brk allocation for DMI
>>
>> are available in the git repository at:
>>
>> git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen.git push/x86/brk
>>
>> Jeremy Fitzhardinge (1):
>> x86: allow extend_brk users to reserve brk space
>>
>> Yinghai Lu (1):
>> x86-32: compute initial mapping size more accurately
>>
>> arch/x86/include/asm/page_32_types.h | 5 +++++
>> arch/x86/include/asm/setup.h | 30 ++++++++++++++++++++++++++++++
>> arch/x86/kernel/head_32.S | 4 +++-
>> arch/x86/kernel/setup.c | 2 ++
>> arch/x86/kernel/vmlinux_32.lds.S | 4 +++-
>> arch/x86/kernel/vmlinux_64.lds.S | 4 +++-
>> 6 files changed, 46 insertions(+), 3 deletions(-)
>>
>> git diff 21e8ba72daf5d7f0af33968f873499c85f96ccef..push/x86/brk
>> diff --git a/arch/x86/include/asm/page_32_types.h
>> b/arch/x86/include/asm/page_32_types.h
>> index f1e4a79..0f915ae 100644
>> --- a/arch/x86/include/asm/page_32_types.h
>> +++ b/arch/x86/include/asm/page_32_types.h
>> @@ -39,6 +39,11 @@
>> #define __VIRTUAL_MASK_SHIFT 32
>> #endif /* CONFIG_X86_PAE */
>>
>> +/*
>> + * Kernel image size is limited to 512 MB (see in
>> arch/x86/kernel/head_32.S)
>> + */
>> +#define KERNEL_IMAGE_SIZE (512 * 1024 * 1024)
>> +
>> #ifndef __ASSEMBLY__
>>
>> /*
>> diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
>> index 366d366..61b126b 100644
>> --- a/arch/x86/include/asm/setup.h
>> +++ b/arch/x86/include/asm/setup.h
>> @@ -104,6 +104,29 @@ extern struct boot_params boot_params;
>> extern unsigned long _brk_end;
>> void *extend_brk(size_t size, size_t align);
>>
>> +/*
>> + * Reserve space in the brk section. The name must be unique within
>> + * the file, and somewhat descriptive. The size is in bytes. Must be
>> + * used at file scope.
>> + *
>> + * (This uses a temp function to wrap the asm so we can pass it the
>> + * size parameter; otherwise we wouldn't be able to. We can't use a
>> + * "section" attribute on a normal variable because it always ends up
>> + * being @progbits, which ends up allocating space in the vmlinux
>> + * executable.)
>> + */
>> +#define RESERVE_BRK(name,sz) \
>> + static void __section(.discard) __used \
>> + __brk_reservation_fn_##name##__(void) { \
>> + asm volatile ( \
>> + ".pushsection .brk_reservation,\"aw\",@nobits;" \
>> + "__brk_reservation_" #name "__:" \
>> + " 1:.skip %c0;" \
>> + " .size __brk_reservation_" #name "__, . - 1b;" \
>> + " .popsection" \
>> + : : "i" (sz)); \
>> + }
>> +
>> #ifdef __i386__
>>
>> void __init i386_start_kernel(void);
>> @@ -115,6 +138,13 @@ void __init x86_64_start_reservations(char
>> *real_mode_data);
>>
>> #endif /* __i386__ */
>> #endif /* _SETUP */
>> +#else
>> +#define RESERVE_BRK(name,sz) \
>> + .pushsection .brk_reservation,"aw",@nobits; \
>> +__brk_reservation_##name##__: \
>> +1: .skip sz; \
>> + .size __brk_reservation_##name##__,.-1b; \
>> + .popsection
>> #endif /* __ASSEMBLY__ */
>> #endif /* __KERNEL__ */
>>
>> diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
>> index d243437..80dc05e 100644
>> --- a/arch/x86/kernel/head_32.S
>> +++ b/arch/x86/kernel/head_32.S
>> @@ -54,7 +54,7 @@
>> *
>> * This should be a multiple of a page.
>> */
>> -LOW_PAGES = 1<<(32-PAGE_SHIFT_asm)
>> +LOW_PAGES = (KERNEL_IMAGE_SIZE + PAGE_SIZE_asm - 1)>>PAGE_SHIFT
>>
>> /*
>> * To preserve the DMA pool in PAGEALLOC kernels, we'll allocate
>> @@ -75,6 +75,8 @@ ALLOCATOR_SLOP = 4
>>
>> INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE +
>> ALLOCATOR_SLOP)*PAGE_SIZE_asm
>>
>
> no user for INIT_MAP_BEYOND_END any more.
>
There are several remaining references:
: abulafia:pts/0; grep INIT_MAP_BEYOND_END arch/x86/kernel/head_32.S
INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_SIZE_asm
* and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END.
* End condition: we must map up to and including INIT_MAP_BEYOND_END
leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp
* End condition: we must map up to and including INIT_MAP_BEYOND_END
leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp
Are you saying they're redundant and should be removed?
> could add
> /*
> * Build-time check on the image size:
> */
> ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
> "kernel image bigger than KERNEL_IMAGE_SIZE")
>
>
> for 32bit
>
I guess we could, but it doesn't seem very urgent.
J
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [GIT PULL] x86: add brk allocator for very early allocations
2009-03-13 20:27 ` Jeremy Fitzhardinge
@ 2009-03-13 21:03 ` Yinghai Lu
2009-03-13 22:45 ` H. Peter Anvin
1 sibling, 0 replies; 12+ messages in thread
From: Yinghai Lu @ 2009-03-13 21:03 UTC (permalink / raw)
To: Jeremy Fitzhardinge
Cc: H. Peter Anvin, Ingo Molnar, the arch/x86 maintainers,
Eric W. Biederman, Linux Kernel Mailing List
[-- Attachment #1: Type: text/plain, Size: 11785 bytes --]
Jeremy Fitzhardinge wrote:
> Yinghai Lu wrote:
>> Jeremy Fitzhardinge wrote:
>>
>>> Yinghai Lu wrote:
>>>
>>>> could have more explanation about the 1M size.
>>>> because initial_pg_tables will sit in it. please consider to add
>>>> something like
>>>>
>>>> in head_32.S
>>>>
>>>> LOW_PAGES = (KERNEL_IMAGE_SIZE + PAGE_SIZE_asm - 1)>>PAGE_SHIFT
>>>>
>>>> #if PTRS_PER_PMD > 1
>>>> PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PMD) + PTRS_PER_PGD
>>>> #else
>>>> PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PGD)
>>>> #endif
>>>> ALLOCATOR_SLOP = 4
>>>>
>>> OK, how does this look:
>>>
>>> The following changes since commit
>>> 21e8ba72daf5d7f0af33968f873499c85f96ccef:
>>> Jeremy Fitzhardinge (1):
>>> x86: use brk allocation for DMI
>>>
>>> are available in the git repository at:
>>>
>>> git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen.git
>>> push/x86/brk
>>>
>>> Jeremy Fitzhardinge (1):
>>> x86: allow extend_brk users to reserve brk space
>>>
>>> Yinghai Lu (1):
>>> x86-32: compute initial mapping size more accurately
>>>
>>> arch/x86/include/asm/page_32_types.h | 5 +++++
>>> arch/x86/include/asm/setup.h | 30
>>> ++++++++++++++++++++++++++++++
>>> arch/x86/kernel/head_32.S | 4 +++-
>>> arch/x86/kernel/setup.c | 2 ++
>>> arch/x86/kernel/vmlinux_32.lds.S | 4 +++-
>>> arch/x86/kernel/vmlinux_64.lds.S | 4 +++-
>>> 6 files changed, 46 insertions(+), 3 deletions(-)
>>>
>>> git diff 21e8ba72daf5d7f0af33968f873499c85f96ccef..push/x86/brk
>>> diff --git a/arch/x86/include/asm/page_32_types.h
>>> b/arch/x86/include/asm/page_32_types.h
>>> index f1e4a79..0f915ae 100644
>>> --- a/arch/x86/include/asm/page_32_types.h
>>> +++ b/arch/x86/include/asm/page_32_types.h
>>> @@ -39,6 +39,11 @@
>>> #define __VIRTUAL_MASK_SHIFT 32
>>> #endif /* CONFIG_X86_PAE */
>>>
>>> +/*
>>> + * Kernel image size is limited to 512 MB (see in
>>> arch/x86/kernel/head_32.S)
>>> + */
>>> +#define KERNEL_IMAGE_SIZE (512 * 1024 * 1024)
>>> +
>>> #ifndef __ASSEMBLY__
>>>
>>> /*
>>> diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
>>> index 366d366..61b126b 100644
>>> --- a/arch/x86/include/asm/setup.h
>>> +++ b/arch/x86/include/asm/setup.h
>>> @@ -104,6 +104,29 @@ extern struct boot_params boot_params;
>>> extern unsigned long _brk_end;
>>> void *extend_brk(size_t size, size_t align);
>>>
>>> +/*
>>> + * Reserve space in the brk section. The name must be unique within
>>> + * the file, and somewhat descriptive. The size is in bytes. Must be
>>> + * used at file scope.
>>> + *
>>> + * (This uses a temp function to wrap the asm so we can pass it the
>>> + * size parameter; otherwise we wouldn't be able to. We can't use a
>>> + * "section" attribute on a normal variable because it always ends up
>>> + * being @progbits, which ends up allocating space in the vmlinux
>>> + * executable.)
>>> + */
>>> +#define RESERVE_BRK(name,sz) \
>>> + static void __section(.discard) __used \
>>> + __brk_reservation_fn_##name##__(void) { \
>>> + asm volatile ( \
>>> + ".pushsection .brk_reservation,\"aw\",@nobits;" \
>>> + "__brk_reservation_" #name "__:" \
>>> + " 1:.skip %c0;" \
>>> + " .size __brk_reservation_" #name "__, . - 1b;" \
>>> + " .popsection" \
>>> + : : "i" (sz)); \
>>> + }
>>> +
>>> #ifdef __i386__
>>>
>>> void __init i386_start_kernel(void);
>>> @@ -115,6 +138,13 @@ void __init x86_64_start_reservations(char
>>> *real_mode_data);
>>>
>>> #endif /* __i386__ */
>>> #endif /* _SETUP */
>>> +#else
>>> +#define RESERVE_BRK(name,sz) \
>>> + .pushsection .brk_reservation,"aw",@nobits; \
>>> +__brk_reservation_##name##__: \
>>> +1: .skip sz; \
>>> + .size __brk_reservation_##name##__,.-1b; \
>>> + .popsection
>>> #endif /* __ASSEMBLY__ */
>>> #endif /* __KERNEL__ */
>>>
>>> diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
>>> index d243437..80dc05e 100644
>>> --- a/arch/x86/kernel/head_32.S
>>> +++ b/arch/x86/kernel/head_32.S
>>> @@ -54,7 +54,7 @@
>>> *
>>> * This should be a multiple of a page.
>>> */
>>> -LOW_PAGES = 1<<(32-PAGE_SHIFT_asm)
>>> +LOW_PAGES = (KERNEL_IMAGE_SIZE + PAGE_SIZE_asm - 1)>>PAGE_SHIFT
>>>
>>> /*
>>> * To preserve the DMA pool in PAGEALLOC kernels, we'll allocate
>>> @@ -75,6 +75,8 @@ ALLOCATOR_SLOP = 4
>>>
>>> INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE +
>>> ALLOCATOR_SLOP)*PAGE_SIZE_asm
>>>
>>
>> no user for INIT_MAP_BEYOND_END any more.
>>
>
> There are several remaining references:
>
> : abulafia:pts/0; grep INIT_MAP_BEYOND_END arch/x86/kernel/head_32.S
> INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE +
> ALLOCATOR_SLOP)*PAGE_SIZE_asm
> * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END.
> * End condition: we must map up to and including INIT_MAP_BEYOND_END
> leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp
> * End condition: we must map up to and including INIT_MAP_BEYOND_END
> leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp
>
> Are you saying they're redundant and should be removed?
please check attached ...
Impact: cleanup
Don't use ram after _end blindly for pagetables. aka init pages is before _end
put those pg table into .bss
v2: keep initial page table up to 512M only.
v4: put initial page tables just before _end
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
---
arch/x86/include/asm/page_32_types.h | 5 +++
arch/x86/kernel/head32.c | 3 +
arch/x86/kernel/head_32.S | 55 ++++++++++++++---------------------
arch/x86/kernel/vmlinux_32.lds.S | 11 ++++++-
4 files changed, 40 insertions(+), 34 deletions(-)
Index: linux-2.6/arch/x86/kernel/head32.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/head32.c
+++ linux-2.6/arch/x86/kernel/head32.c
@@ -18,7 +18,8 @@ void __init i386_start_kernel(void)
{
reserve_trampoline_memory();
- reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS");
+ reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop),
+ "TEXT DATA BSS");
#ifdef CONFIG_BLK_DEV_INITRD
/* Reserve INITRD */
Index: linux-2.6/arch/x86/kernel/head_32.S
===================================================================
--- linux-2.6.orig/arch/x86/kernel/head_32.S
+++ linux-2.6/arch/x86/kernel/head_32.S
@@ -38,42 +38,30 @@
#define X86_VENDOR_ID new_cpu_data+CPUINFO_x86_vendor_id
/*
- * This is how much memory *in addition to the memory covered up to
- * and including _end* we need mapped initially.
- * We need:
- * - one bit for each possible page, but only in low memory, which means
- * 232/4096/8 = 128K worst case (4G/4G split.)
+ * This is how much memory for page table to and including _end
+ * we need mapped initially.
* - enough space to map all low memory, which means
- * (232/4096) / 1024 pages (worst case, non PAE)
- * (232/4096) / 512 + 4 pages (worst case for PAE)
- * - a few pages for allocator use before the kernel pagetable has
- * been set up
+ * (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE)
+ * (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE)
*
* Modulo rounding, each megabyte assigned here requires a kilobyte of
* memory, which is currently unreclaimed.
*
* This should be a multiple of a page.
+ *
+ * KERNEL_IMAGE_SIZE should be greater than pa(_end)
+ * and small than max_low_pfn, otherwise will waste some page table entries
*/
-LOW_PAGES = 1<<(32-PAGE_SHIFT_asm)
-
-/*
- * To preserve the DMA pool in PAGEALLOC kernels, we'll allocate
- * pagetables from above the 16MB DMA limit, so we'll have to set
- * up pagetables 16MB more (worst-case):
- */
-#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK)
-LOW_PAGES = LOW_PAGES + 0x1000000
-#endif
+LOW_PAGES = (KERNEL_IMAGE_SIZE + PAGE_SIZE_asm - 1)>>PAGE_SHIFT
#if PTRS_PER_PMD > 1
PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PMD) + PTRS_PER_PGD
#else
PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PGD)
#endif
-BOOTBITMAP_SIZE = LOW_PAGES / 8
ALLOCATOR_SLOP = 4
-INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_SIZE_asm
+INIT_MAP_SIZE = (PAGE_TABLE_SIZE + ALLOCATOR_SLOP) * PAGE_SIZE_asm
/*
* 32-bit kernel entrypoint; only used by the boot CPU. On entry,
@@ -166,10 +154,9 @@ num_subarch_entries = (. - subarch_entri
/*
* Initialize page tables. This creates a PDE and a set of page
- * tables, which are located immediately beyond _end. The variable
- * init_pg_tables_end is set up to point to the first "safe" location.
+ * tables, which are located immediately beyond _end.
* Mappings are created both at virtual address 0 (identity mapping)
- * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END.
+ * and PAGE_OFFSET for up to _end
*
* Note that the stack is not yet set up!
*/
@@ -209,14 +196,14 @@ default_entry:
loop 11b
/*
- * End condition: we must map up to and including INIT_MAP_BEYOND_END
- * bytes beyond the end of our own page tables.
+ * End condition: we must map up to the end.
*/
- leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp
+ movl $pa(_end), %ebp
+ addl PTE_IDENT_ATTR, %ebp
cmpl %ebp,%eax
jb 10b
1:
- movl %edi,pa(init_pg_tables_end)
+ movl %edi, pa(init_pg_tables_end)
shrl $12, %eax
movl %eax, pa(max_pfn_mapped)
@@ -242,14 +229,14 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
addl $0x1000,%eax
loop 11b
/*
- * End condition: we must map up to and including INIT_MAP_BEYOND_END
- * bytes beyond the end of our own page tables; the +0x007 is
+ * End condition: we must map up to end, the +0x007 is
* the attribute bits
*/
- leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp
+ movl $pa(_end), %ebp
+ addl PTE_IDENT_ATTR, %ebp
cmpl %ebp,%eax
jb 10b
- movl %edi,pa(init_pg_tables_end)
+ movl %edi, pa(init_pg_tables_end)
shrl $12, %eax
movl %eax, pa(max_pfn_mapped)
@@ -636,6 +623,10 @@ swapper_pg_fixmap:
.fill 1024,4,0
ENTRY(empty_zero_page)
.fill 4096,1,0
+
+.section ".bss.extra_page_aligned","wa"
+ .align PAGE_SIZE_asm
+ .fill INIT_MAP_SIZE,1,0
/*
* This starts the data section.
*/
Index: linux-2.6/arch/x86/kernel/vmlinux_32.lds.S
===================================================================
--- linux-2.6.orig/arch/x86/kernel/vmlinux_32.lds.S
+++ linux-2.6/arch/x86/kernel/vmlinux_32.lds.S
@@ -189,10 +189,13 @@ SECTIONS
*(.bss)
. = ALIGN(4);
__bss_stop = .;
- _end = . ;
+ /* extra_page_aligned must be last one before end*/
/* This is where the kernel creates the early boot page tables */
. = ALIGN(PAGE_SIZE);
pg0 = . ;
+ *(.bss.extra_page_aligned)
+ . = ALIGN(8);
+ _end = . ;
}
/* Sections to be discarded */
@@ -205,6 +208,12 @@ SECTIONS
DWARF_DEBUG
}
+/*
+ * Build-time check on the image size:
+ */
+ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
+ "kernel image bigger than KERNEL_IMAGE_SIZE")
+
#ifdef CONFIG_KEXEC
/* Link time checks */
#include <asm/kexec.h>
Index: linux-2.6/arch/x86/include/asm/page_32_types.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/page_32_types.h
+++ linux-2.6/arch/x86/include/asm/page_32_types.h
@@ -39,6 +39,11 @@
#define __VIRTUAL_MASK_SHIFT 32
#endif /* CONFIG_X86_PAE */
+/*
+ * Kernel image size is limited to 512 MB (see in arch/x86/kernel/head_32.S)
+ */
+#define KERNEL_IMAGE_SIZE (512 * 1024 * 1024)
+
#ifndef __ASSEMBLY__
/*
[-- Attachment #2: Attached Message --]
[-- Type: message/rfc822, Size: 7517 bytes --]
From: Yinghai Lu <yinghai@kernel.org>
To: "H. Peter Anvin" <hpa@zytor.com>, Ingo Molnar <mingo@elte.hu>, Andrew Morton <akpm@linux-foundation.org>, Thomas Gleixner <tglx@linutronix.de>, Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Linux Kernel Mailing List <linux-kernel@vger.kernel.org>
Subject: [PATCH] x86: put initial_pg_tables into .bss -v4
Date: Mon, 09 Mar 2009 01:15:57 -0700
Message-ID: <49B4D03D.7030205@kernel.org>
Impact: cleanup
Don't use ram after _end blindly for pagetables. aka init pages is before _end
put those pg table into .bss
v2: keep initial page table up to 512M only.
v4: put initial page tables just before _end
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
---
arch/x86/include/asm/page_32_types.h | 5 +++
arch/x86/kernel/head32.c | 3 +
arch/x86/kernel/head_32.S | 55 ++++++++++++++---------------------
arch/x86/kernel/vmlinux_32.lds.S | 11 ++++++-
4 files changed, 40 insertions(+), 34 deletions(-)
Index: linux-2.6/arch/x86/kernel/head32.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/head32.c
+++ linux-2.6/arch/x86/kernel/head32.c
@@ -18,7 +18,8 @@ void __init i386_start_kernel(void)
{
reserve_trampoline_memory();
- reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS");
+ reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop),
+ "TEXT DATA BSS");
#ifdef CONFIG_BLK_DEV_INITRD
/* Reserve INITRD */
Index: linux-2.6/arch/x86/kernel/head_32.S
===================================================================
--- linux-2.6.orig/arch/x86/kernel/head_32.S
+++ linux-2.6/arch/x86/kernel/head_32.S
@@ -38,42 +38,30 @@
#define X86_VENDOR_ID new_cpu_data+CPUINFO_x86_vendor_id
/*
- * This is how much memory *in addition to the memory covered up to
- * and including _end* we need mapped initially.
- * We need:
- * - one bit for each possible page, but only in low memory, which means
- * 2^32/4096/8 = 128K worst case (4G/4G split.)
+ * This is how much memory for page table to and including _end
+ * we need mapped initially.
* - enough space to map all low memory, which means
- * (2^32/4096) / 1024 pages (worst case, non PAE)
- * (2^32/4096) / 512 + 4 pages (worst case for PAE)
- * - a few pages for allocator use before the kernel pagetable has
- * been set up
+ * (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE)
+ * (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE)
*
* Modulo rounding, each megabyte assigned here requires a kilobyte of
* memory, which is currently unreclaimed.
*
* This should be a multiple of a page.
+ *
+ * KERNEL_IMAGE_SIZE should be greater than pa(_end)
+ * and small than max_low_pfn, otherwise will waste some page table entries
*/
-LOW_PAGES = 1<<(32-PAGE_SHIFT_asm)
-
-/*
- * To preserve the DMA pool in PAGEALLOC kernels, we'll allocate
- * pagetables from above the 16MB DMA limit, so we'll have to set
- * up pagetables 16MB more (worst-case):
- */
-#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK)
-LOW_PAGES = LOW_PAGES + 0x1000000
-#endif
+LOW_PAGES = (KERNEL_IMAGE_SIZE + PAGE_SIZE_asm - 1)>>PAGE_SHIFT
#if PTRS_PER_PMD > 1
PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PMD) + PTRS_PER_PGD
#else
PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PGD)
#endif
-BOOTBITMAP_SIZE = LOW_PAGES / 8
ALLOCATOR_SLOP = 4
-INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_SIZE_asm
+INIT_MAP_SIZE = (PAGE_TABLE_SIZE + ALLOCATOR_SLOP) * PAGE_SIZE_asm
/*
* 32-bit kernel entrypoint; only used by the boot CPU. On entry,
@@ -166,10 +154,9 @@ num_subarch_entries = (. - subarch_entri
/*
* Initialize page tables. This creates a PDE and a set of page
- * tables, which are located immediately beyond _end. The variable
- * init_pg_tables_end is set up to point to the first "safe" location.
+ * tables, which are located immediately beyond _end.
* Mappings are created both at virtual address 0 (identity mapping)
- * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END.
+ * and PAGE_OFFSET for up to _end
*
* Note that the stack is not yet set up!
*/
@@ -209,14 +196,14 @@ default_entry:
loop 11b
/*
- * End condition: we must map up to and including INIT_MAP_BEYOND_END
- * bytes beyond the end of our own page tables.
+ * End condition: we must map up to the end.
*/
- leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp
+ movl $pa(_end), %ebp
+ addl PTE_IDENT_ATTR, %ebp
cmpl %ebp,%eax
jb 10b
1:
- movl %edi,pa(init_pg_tables_end)
+ movl %edi, pa(init_pg_tables_end)
shrl $12, %eax
movl %eax, pa(max_pfn_mapped)
@@ -242,14 +229,14 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
addl $0x1000,%eax
loop 11b
/*
- * End condition: we must map up to and including INIT_MAP_BEYOND_END
- * bytes beyond the end of our own page tables; the +0x007 is
+ * End condition: we must map up to end, the +0x007 is
* the attribute bits
*/
- leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp
+ movl $pa(_end), %ebp
+ addl PTE_IDENT_ATTR, %ebp
cmpl %ebp,%eax
jb 10b
- movl %edi,pa(init_pg_tables_end)
+ movl %edi, pa(init_pg_tables_end)
shrl $12, %eax
movl %eax, pa(max_pfn_mapped)
@@ -636,6 +623,10 @@ swapper_pg_fixmap:
.fill 1024,4,0
ENTRY(empty_zero_page)
.fill 4096,1,0
+
+.section ".bss.extra_page_aligned","wa"
+ .align PAGE_SIZE_asm
+ .fill INIT_MAP_SIZE,1,0
/*
* This starts the data section.
*/
Index: linux-2.6/arch/x86/kernel/vmlinux_32.lds.S
===================================================================
--- linux-2.6.orig/arch/x86/kernel/vmlinux_32.lds.S
+++ linux-2.6/arch/x86/kernel/vmlinux_32.lds.S
@@ -189,10 +189,13 @@ SECTIONS
*(.bss)
. = ALIGN(4);
__bss_stop = .;
- _end = . ;
+ /* extra_page_aligned must be last one before end*/
/* This is where the kernel creates the early boot page tables */
. = ALIGN(PAGE_SIZE);
pg0 = . ;
+ *(.bss.extra_page_aligned)
+ . = ALIGN(8);
+ _end = . ;
}
/* Sections to be discarded */
@@ -205,6 +208,12 @@ SECTIONS
DWARF_DEBUG
}
+/*
+ * Build-time check on the image size:
+ */
+ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
+ "kernel image bigger than KERNEL_IMAGE_SIZE")
+
#ifdef CONFIG_KEXEC
/* Link time checks */
#include <asm/kexec.h>
Index: linux-2.6/arch/x86/include/asm/page_32_types.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/page_32_types.h
+++ linux-2.6/arch/x86/include/asm/page_32_types.h
@@ -39,6 +39,11 @@
#define __VIRTUAL_MASK_SHIFT 32
#endif /* CONFIG_X86_PAE */
+/*
+ * Kernel image size is limited to 512 MB (see in arch/x86/kernel/head_32.S)
+ */
+#define KERNEL_IMAGE_SIZE (512 * 1024 * 1024)
+
#ifndef __ASSEMBLY__
/*
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [GIT PULL] x86: add brk allocator for very early allocations
2009-03-13 20:27 ` Jeremy Fitzhardinge
2009-03-13 21:03 ` Yinghai Lu
@ 2009-03-13 22:45 ` H. Peter Anvin
2009-03-13 22:59 ` Jeremy Fitzhardinge
1 sibling, 1 reply; 12+ messages in thread
From: H. Peter Anvin @ 2009-03-13 22:45 UTC (permalink / raw)
To: Jeremy Fitzhardinge
Cc: Yinghai Lu, Ingo Molnar, the arch/x86 maintainers,
Eric W. Biederman, Linux Kernel Mailing List
Jeremy Fitzhardinge wrote:
>>>
>>> INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE +
>>> ALLOCATOR_SLOP)*PAGE_SIZE_asm
>>>
>>
>> no user for INIT_MAP_BEYOND_END any more.
>>
>
> There are several remaining references:
>
> : abulafia:pts/0; grep INIT_MAP_BEYOND_END arch/x86/kernel/head_32.S
> INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE +
> ALLOCATOR_SLOP)*PAGE_SIZE_asm
> * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END.
> * End condition: we must map up to and including INIT_MAP_BEYOND_END
> leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp
> * End condition: we must map up to and including INIT_MAP_BEYOND_END
> leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp
>
> Are you saying they're redundant and should be removed?
>
Those references are just the actual reservation of memory. If all the
users of that memory are converted to either brk or bss,
INIT_MAP_BEYOND_END should be removed. If all the users of that memory
aren't converted to brk to bss, we should do so.
-hpa
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [GIT PULL] x86: add brk allocator for very early allocations
2009-03-13 22:45 ` H. Peter Anvin
@ 2009-03-13 22:59 ` Jeremy Fitzhardinge
2009-03-13 23:20 ` Yinghai Lu
0 siblings, 1 reply; 12+ messages in thread
From: Jeremy Fitzhardinge @ 2009-03-13 22:59 UTC (permalink / raw)
To: H. Peter Anvin
Cc: Yinghai Lu, Ingo Molnar, the arch/x86 maintainers,
Eric W. Biederman, Linux Kernel Mailing List
H. Peter Anvin wrote:
> Those references are just the actual reservation of memory. If all
> the users of that memory are converted to either brk or bss,
> INIT_MAP_BEYOND_END should be removed. If all the users of that
> memory aren't converted to brk to bss, we should do so.
I just added an adapted Yinghai's patch and added it to push/x86/brk.
J
The following changes since commit 8131667360004a0b74e4dcadfee8a18d4e2b074f:
Jeremy Fitzhardinge (1):
x86: allow extend_brk users to reserve brk space
are available in the git repository at:
git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen.git push/x86/brk
Yinghai Lu (1):
x86: put initial_pg_tables into .bss -v4
arch/x86/kernel/head_32.S | 47 ++++++++++++++-----------------------
arch/x86/kernel/vmlinux_32.lds.S | 6 +++++
2 files changed, 24 insertions(+), 29 deletions(-)
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 80dc05e..ad7dbbb 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -38,42 +38,30 @@
#define X86_VENDOR_ID new_cpu_data+CPUINFO_x86_vendor_id
/*
- * This is how much memory *in addition to the memory covered up to
- * and including _end* we need mapped initially.
- * We need:
- * - one bit for each possible page, but only in low memory, which means
- * 2^32/4096/8 = 128K worst case (4G/4G split.)
+ * This is how much memory for page table to and including _end
+ * we need mapped initially.
* - enough space to map all low memory, which means
- * (2^32/4096) / 1024 pages (worst case, non PAE)
- * (2^32/4096) / 512 + 4 pages (worst case for PAE)
- * - a few pages for allocator use before the kernel pagetable has
- * been set up
+ * (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE)
+ * (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE)
*
* Modulo rounding, each megabyte assigned here requires a kilobyte of
* memory, which is currently unreclaimed.
*
* This should be a multiple of a page.
+ *
+ * KERNEL_IMAGE_SIZE should be greater than pa(_end)
+ * and small than max_low_pfn, otherwise will waste some page table entries
*/
LOW_PAGES = (KERNEL_IMAGE_SIZE + PAGE_SIZE_asm - 1)>>PAGE_SHIFT
-/*
- * To preserve the DMA pool in PAGEALLOC kernels, we'll allocate
- * pagetables from above the 16MB DMA limit, so we'll have to set
- * up pagetables 16MB more (worst-case):
- */
-#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK)
-LOW_PAGES = LOW_PAGES + 0x1000000
-#endif
-
#if PTRS_PER_PMD > 1
PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PMD) + PTRS_PER_PGD
#else
PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PGD)
#endif
-BOOTBITMAP_SIZE = LOW_PAGES / 8
ALLOCATOR_SLOP = 4
-INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_SIZE_asm
+INIT_MAP_SIZE = (PAGE_TABLE_SIZE + ALLOCATOR_SLOP) * PAGE_SIZE_asm
RESERVE_BRK(pagetables, PAGE_TABLE_SIZE * PAGE_SIZE)
@@ -168,10 +156,10 @@ num_subarch_entries = (. - subarch_entries) / 4
/*
* Initialize page tables. This creates a PDE and a set of page
- * tables, which are located immediately beyond _end. The variable
+ * tables, which are located immediately beyond __brk_base. The variable
* _brk_end is set up to point to the first "safe" location.
* Mappings are created both at virtual address 0 (identity mapping)
- * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END.
+ * and PAGE_OFFSET for up to _end.
*
* Note that the stack is not yet set up!
*/
@@ -210,10 +198,9 @@ default_entry:
loop 11b
/*
- * End condition: we must map up to and including INIT_MAP_BEYOND_END
- * bytes beyond the end of our own page tables.
+ * End condition: we must map up to the end.
*/
- leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp
+ movl $pa(_end) + PTE_IDENT_ATTR, %ebp
cmpl %ebp,%eax
jb 10b
1:
@@ -243,11 +230,9 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
addl $0x1000,%eax
loop 11b
/*
- * End condition: we must map up to and including INIT_MAP_BEYOND_END
- * bytes beyond the end of our own page tables; the +0x007 is
- * the attribute bits
+ * End condition: we must map up to end
*/
- leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp
+ movl $pa(_end) + PTE_IDENT_ATTR, %ebp
cmpl %ebp,%eax
jb 10b
addl $__PAGE_OFFSET, %edi
@@ -638,6 +623,10 @@ swapper_pg_fixmap:
.fill 1024,4,0
ENTRY(empty_zero_page)
.fill 4096,1,0
+
+.section ".bss.extra_page_aligned","wa"
+ .align PAGE_SIZE_asm
+ .fill INIT_MAP_SIZE,1,0
/*
* This starts the data section.
*/
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index 4005279..c318dee 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -210,6 +210,12 @@ SECTIONS
DWARF_DEBUG
}
+/*
+ * Build-time check on the image size:
+ */
+ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
+ "kernel image bigger than KERNEL_IMAGE_SIZE")
+
#ifdef CONFIG_KEXEC
/* Link time checks */
#include <asm/kexec.h>
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [GIT PULL] x86: add brk allocator for very early allocations
2009-03-13 22:59 ` Jeremy Fitzhardinge
@ 2009-03-13 23:20 ` Yinghai Lu
2009-03-14 0:23 ` Jeremy Fitzhardinge
0 siblings, 1 reply; 12+ messages in thread
From: Yinghai Lu @ 2009-03-13 23:20 UTC (permalink / raw)
To: Jeremy Fitzhardinge
Cc: H. Peter Anvin, Ingo Molnar, the arch/x86 maintainers,
Eric W. Biederman, Linux Kernel Mailing List
Jeremy Fitzhardinge wrote:
> H. Peter Anvin wrote:
>> Those references are just the actual reservation of memory. If all
>> the users of that memory are converted to either brk or bss,
>> INIT_MAP_BEYOND_END should be removed. If all the users of that
>> memory aren't converted to brk to bss, we should do so.
>
> I just added an adapted Yinghai's patch and added it to push/x86/brk.
>
> J
>
>
> The following changes since commit
> 8131667360004a0b74e4dcadfee8a18d4e2b074f:
> Jeremy Fitzhardinge (1):
> x86: allow extend_brk users to reserve brk space
>
> are available in the git repository at:
>
> git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen.git push/x86/brk
>
> Yinghai Lu (1):
> x86: put initial_pg_tables into .bss -v4
>
> arch/x86/kernel/head_32.S | 47
> ++++++++++++++-----------------------
> arch/x86/kernel/vmlinux_32.lds.S | 6 +++++
> 2 files changed, 24 insertions(+), 29 deletions(-)
>
> diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
> index 80dc05e..ad7dbbb 100644
> --- a/arch/x86/kernel/head_32.S
> +++ b/arch/x86/kernel/head_32.S
> @@ -38,42 +38,30 @@
> #define X86_VENDOR_ID new_cpu_data+CPUINFO_x86_vendor_id
>
> /*
> - * This is how much memory *in addition to the memory covered up to
> - * and including _end* we need mapped initially.
> - * We need:
> - * - one bit for each possible page, but only in low memory, which means
> - * 2^32/4096/8 = 128K worst case (4G/4G split.)
> + * This is how much memory for page table to and including _end
> + * we need mapped initially.
> * - enough space to map all low memory, which means
> - * (2^32/4096) / 1024 pages (worst case, non PAE)
> - * (2^32/4096) / 512 + 4 pages (worst case for PAE)
> - * - a few pages for allocator use before the kernel pagetable has
> - * been set up
> + * (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE)
> + * (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE)
> *
> * Modulo rounding, each megabyte assigned here requires a kilobyte of
> * memory, which is currently unreclaimed.
> *
> * This should be a multiple of a page.
> + *
> + * KERNEL_IMAGE_SIZE should be greater than pa(_end)
> + * and small than max_low_pfn, otherwise will waste some page table
> entries
> */
> LOW_PAGES = (KERNEL_IMAGE_SIZE + PAGE_SIZE_asm - 1)>>PAGE_SHIFT
>
> -/*
> - * To preserve the DMA pool in PAGEALLOC kernels, we'll allocate
> - * pagetables from above the 16MB DMA limit, so we'll have to set
> - * up pagetables 16MB more (worst-case):
> - */
> -#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK)
> -LOW_PAGES = LOW_PAGES + 0x1000000
> -#endif
> -
> #if PTRS_PER_PMD > 1
> PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PMD) + PTRS_PER_PGD
> #else
> PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PGD)
> #endif
> -BOOTBITMAP_SIZE = LOW_PAGES / 8
> ALLOCATOR_SLOP = 4
>
> -INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE +
> ALLOCATOR_SLOP)*PAGE_SIZE_asm
> +INIT_MAP_SIZE = (PAGE_TABLE_SIZE + ALLOCATOR_SLOP) * PAGE_SIZE_asm
>
> RESERVE_BRK(pagetables, PAGE_TABLE_SIZE * PAGE_SIZE)
use INIT_MAP_SIZE in RESERVE_BRK directly?
>
> @@ -168,10 +156,10 @@ num_subarch_entries = (. - subarch_entries) / 4
>
> /*
> * Initialize page tables. This creates a PDE and a set of page
> - * tables, which are located immediately beyond _end. The variable
> + * tables, which are located immediately beyond __brk_base. The variable
> * _brk_end is set up to point to the first "safe" location.
> * Mappings are created both at virtual address 0 (identity mapping)
> - * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END.
> + * and PAGE_OFFSET for up to _end.
> *
> * Note that the stack is not yet set up!
> */
> @@ -210,10 +198,9 @@ default_entry:
> loop 11b
>
> /*
> - * End condition: we must map up to and including INIT_MAP_BEYOND_END
> - * bytes beyond the end of our own page tables.
> + * End condition: we must map up to the end.
> */
> - leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp
> + movl $pa(_end) + PTE_IDENT_ATTR, %ebp
> cmpl %ebp,%eax
> jb 10b
> 1:
> @@ -243,11 +230,9 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
> addl $0x1000,%eax
> loop 11b
> /*
> - * End condition: we must map up to and including INIT_MAP_BEYOND_END
> - * bytes beyond the end of our own page tables; the +0x007 is
> - * the attribute bits
> + * End condition: we must map up to end
> */
> - leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp
> + movl $pa(_end) + PTE_IDENT_ATTR, %ebp
> cmpl %ebp,%eax
> jb 10b
> addl $__PAGE_OFFSET, %edi
> @@ -638,6 +623,10 @@ swapper_pg_fixmap:
> .fill 1024,4,0
> ENTRY(empty_zero_page)
> .fill 4096,1,0
> +
> +.section ".bss.extra_page_aligned","wa"
> + .align PAGE_SIZE_asm
> + .fill INIT_MAP_SIZE,1,0
this 4 lines are not needed. you already had RESERVE_BRK for it.
YH
> /*
> * This starts the data section.
> */
> diff --git a/arch/x86/kernel/vmlinux_32.lds.S
> b/arch/x86/kernel/vmlinux_32.lds.S
> index 4005279..c318dee 100644
> --- a/arch/x86/kernel/vmlinux_32.lds.S
> +++ b/arch/x86/kernel/vmlinux_32.lds.S
> @@ -210,6 +210,12 @@ SECTIONS
> DWARF_DEBUG
> }
>
> +/*
> + * Build-time check on the image size:
> + */
> +ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
> + "kernel image bigger than KERNEL_IMAGE_SIZE")
> +
> #ifdef CONFIG_KEXEC
> /* Link time checks */
> #include <asm/kexec.h>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [GIT PULL] x86: add brk allocator for very early allocations
2009-03-13 23:20 ` Yinghai Lu
@ 2009-03-14 0:23 ` Jeremy Fitzhardinge
0 siblings, 0 replies; 12+ messages in thread
From: Jeremy Fitzhardinge @ 2009-03-14 0:23 UTC (permalink / raw)
To: Yinghai Lu
Cc: H. Peter Anvin, Ingo Molnar, the arch/x86 maintainers,
Eric W. Biederman, Linux Kernel Mailing List
Yinghai Lu wrote:
> Jeremy Fitzhardinge wrote:
>
>> H. Peter Anvin wrote:
>>
>>> Those references are just the actual reservation of memory. If all
>>> the users of that memory are converted to either brk or bss,
>>> INIT_MAP_BEYOND_END should be removed. If all the users of that
>>> memory aren't converted to brk to bss, we should do so.
>>>
>> I just added an adapted Yinghai's patch and added it to push/x86/brk.
>>
>> J
>>
>>
>> The following changes since commit
>> 8131667360004a0b74e4dcadfee8a18d4e2b074f:
>> Jeremy Fitzhardinge (1):
>> x86: allow extend_brk users to reserve brk space
>>
>> are available in the git repository at:
>>
>> git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen.git push/x86/brk
>>
>> Yinghai Lu (1):
>> x86: put initial_pg_tables into .bss -v4
>>
>> arch/x86/kernel/head_32.S | 47
>> ++++++++++++++-----------------------
>> arch/x86/kernel/vmlinux_32.lds.S | 6 +++++
>> 2 files changed, 24 insertions(+), 29 deletions(-)
>>
>> diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
>> index 80dc05e..ad7dbbb 100644
>> --- a/arch/x86/kernel/head_32.S
>> +++ b/arch/x86/kernel/head_32.S
>> @@ -38,42 +38,30 @@
>> #define X86_VENDOR_ID new_cpu_data+CPUINFO_x86_vendor_id
>>
>> /*
>> - * This is how much memory *in addition to the memory covered up to
>> - * and including _end* we need mapped initially.
>> - * We need:
>> - * - one bit for each possible page, but only in low memory, which means
>> - * 2^32/4096/8 = 128K worst case (4G/4G split.)
>> + * This is how much memory for page table to and including _end
>> + * we need mapped initially.
>> * - enough space to map all low memory, which means
>> - * (2^32/4096) / 1024 pages (worst case, non PAE)
>> - * (2^32/4096) / 512 + 4 pages (worst case for PAE)
>> - * - a few pages for allocator use before the kernel pagetable has
>> - * been set up
>> + * (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE)
>> + * (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE)
>> *
>> * Modulo rounding, each megabyte assigned here requires a kilobyte of
>> * memory, which is currently unreclaimed.
>> *
>> * This should be a multiple of a page.
>> + *
>> + * KERNEL_IMAGE_SIZE should be greater than pa(_end)
>> + * and small than max_low_pfn, otherwise will waste some page table
>> entries
>> */
>> LOW_PAGES = (KERNEL_IMAGE_SIZE + PAGE_SIZE_asm - 1)>>PAGE_SHIFT
>>
>> -/*
>> - * To preserve the DMA pool in PAGEALLOC kernels, we'll allocate
>> - * pagetables from above the 16MB DMA limit, so we'll have to set
>> - * up pagetables 16MB more (worst-case):
>> - */
>> -#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK)
>> -LOW_PAGES = LOW_PAGES + 0x1000000
>> -#endif
>> -
>> #if PTRS_PER_PMD > 1
>> PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PMD) + PTRS_PER_PGD
>> #else
>> PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PGD)
>> #endif
>> -BOOTBITMAP_SIZE = LOW_PAGES / 8
>> ALLOCATOR_SLOP = 4
>>
>> -INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE +
>> ALLOCATOR_SLOP)*PAGE_SIZE_asm
>> +INIT_MAP_SIZE = (PAGE_TABLE_SIZE + ALLOCATOR_SLOP) * PAGE_SIZE_asm
>>
>> RESERVE_BRK(pagetables, PAGE_TABLE_SIZE * PAGE_SIZE)
>>
>
> use INIT_MAP_SIZE in RESERVE_BRK directly?
>
OK.
>
>> @@ -168,10 +156,10 @@ num_subarch_entries = (. - subarch_entries) / 4
>>
>> /*
>> * Initialize page tables. This creates a PDE and a set of page
>> - * tables, which are located immediately beyond _end. The variable
>> + * tables, which are located immediately beyond __brk_base. The variable
>> * _brk_end is set up to point to the first "safe" location.
>> * Mappings are created both at virtual address 0 (identity mapping)
>> - * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END.
>> + * and PAGE_OFFSET for up to _end.
>> *
>> * Note that the stack is not yet set up!
>> */
>> @@ -210,10 +198,9 @@ default_entry:
>> loop 11b
>>
>> /*
>> - * End condition: we must map up to and including INIT_MAP_BEYOND_END
>> - * bytes beyond the end of our own page tables.
>> + * End condition: we must map up to the end.
>> */
>> - leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp
>> + movl $pa(_end) + PTE_IDENT_ATTR, %ebp
>> cmpl %ebp,%eax
>> jb 10b
>> 1:
>> @@ -243,11 +230,9 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
>> addl $0x1000,%eax
>> loop 11b
>> /*
>> - * End condition: we must map up to and including INIT_MAP_BEYOND_END
>> - * bytes beyond the end of our own page tables; the +0x007 is
>> - * the attribute bits
>> + * End condition: we must map up to end
>> */
>> - leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp
>> + movl $pa(_end) + PTE_IDENT_ATTR, %ebp
>> cmpl %ebp,%eax
>> jb 10b
>> addl $__PAGE_OFFSET, %edi
>> @@ -638,6 +623,10 @@ swapper_pg_fixmap:
>> .fill 1024,4,0
>> ENTRY(empty_zero_page)
>> .fill 4096,1,0
>> +
>> +.section ".bss.extra_page_aligned","wa"
>> + .align PAGE_SIZE_asm
>> + .fill INIT_MAP_SIZE,1,0
>>
>
> this 4 lines are not needed. you already had RESERVE_BRK for it.
>
>
OK.
I've updated the changeset and repushed.
J
^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2009-03-14 0:53 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-03-11 16:59 [GIT PULL] x86: add brk allocator for very early allocations Jeremy Fitzhardinge
2009-03-11 18:19 ` Yinghai Lu
2009-03-12 23:59 ` Jeremy Fitzhardinge
2009-03-13 0:44 ` Yinghai Lu
2009-03-13 20:27 ` Jeremy Fitzhardinge
2009-03-13 21:03 ` Yinghai Lu
2009-03-13 22:45 ` H. Peter Anvin
2009-03-13 22:59 ` Jeremy Fitzhardinge
2009-03-13 23:20 ` Yinghai Lu
2009-03-14 0:23 ` Jeremy Fitzhardinge
2009-03-11 19:20 ` Eric W. Biederman
2009-03-11 23:53 ` Jeremy Fitzhardinge
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.