* [PATCH -v7 1/3] x86 boot: setup data
@ 2007-10-23 8:06 Huang, Ying
2007-10-23 22:08 ` Jeremy Fitzhardinge
0 siblings, 1 reply; 11+ messages in thread
From: Huang, Ying @ 2007-10-23 8:06 UTC (permalink / raw)
To: H. Peter Anvin, Andi Kleen, Eric W. Biederman, akpm, Linus Torvalds
Cc: linux-kernel
This patch add a field of 64-bit physical pointer to NULL terminated
single linked list of struct setup_data to real-mode kernel
header. This is used as a more extensible boot parameters passing
mechanism.
Signed-off-by: Huang Ying <ying.huang@intel.com>
---
arch/x86/boot/header.S | 6 +++++-
arch/x86/kernel/e820_64.c | 6 +++---
arch/x86/kernel/head64.c | 26 ++++++++++++++++++++++++++
arch/x86/kernel/head_32.S | 37 ++++++++++++++++++++++++++++++++++++-
arch/x86/kernel/setup_32.c | 25 +++++++++++++++++++++++--
arch/x86/kernel/setup_64.c | 22 ++++++++++++++++++++--
arch/x86/mm/discontig_32.c | 3 ++-
include/asm-x86/bootparam.h | 12 ++++++++++++
include/asm-x86/e820_64.h | 1 +
include/asm-x86/setup_32.h | 7 +++++++
include/asm-x86/setup_64.h | 2 ++
11 files changed, 137 insertions(+), 10 deletions(-)
Index: linux-2.6/include/asm-x86/bootparam.h
===================================================================
--- linux-2.6.orig/include/asm-x86/bootparam.h 2007-10-23 10:01:35.000000000 +0800
+++ linux-2.6/include/asm-x86/bootparam.h 2007-10-23 10:48:48.000000000 +0800
@@ -9,6 +9,17 @@
#include <asm/ist.h>
#include <video/edid.h>
+/* setup data types */
+#define SETUP_NONE 0
+
+/* extensible setup data list node */
+struct setup_data {
+ u64 next;
+ u32 type;
+ u32 len;
+ u8 data[0];
+};
+
struct setup_header {
u8 setup_sects;
u16 root_flags;
@@ -46,6 +57,7 @@
u32 cmdline_size;
u32 hardware_subarch;
u64 hardware_subarch_data;
+ u64 setup_data;
} __attribute__((packed));
struct sys_desc_table {
Index: linux-2.6/arch/x86/boot/header.S
===================================================================
--- linux-2.6.orig/arch/x86/boot/header.S 2007-10-23 10:01:35.000000000 +0800
+++ linux-2.6/arch/x86/boot/header.S 2007-10-23 13:51:29.000000000 +0800
@@ -119,7 +119,7 @@
# Part 2 of the header, from the old setup.S
.ascii "HdrS" # header signature
- .word 0x0207 # header version number (>= 0x0105)
+ .word 0x0208 # header version number (>= 0x0105)
# or else old loadlin-1.5 will fail)
.globl realmode_swtch
realmode_swtch: .word 0, 0 # default_switch, SETUPSEG
@@ -219,6 +219,10 @@
hardware_subarch_data: .quad 0
+setup_data: .quad 0 # 64-bit physical pointer to
+ # single linked list of
+ # struct setup_data
+
# End of setup header #####################################################
.section ".inittext", "ax"
Index: linux-2.6/arch/x86/kernel/setup_64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/setup_64.c 2007-10-23 10:01:35.000000000 +0800
+++ linux-2.6/arch/x86/kernel/setup_64.c 2007-10-23 10:48:48.000000000 +0800
@@ -247,6 +247,22 @@
ebda_size = 64*1024;
}
+static void __init parse_setup_data(void)
+{
+ struct setup_data *data;
+
+ if (boot_params.hdr.version < 0x0207)
+ return;
+ for (data = __va(boot_params.hdr.setup_data);
+ data != __va(0);
+ data = __va(data->next)) {
+ switch (data->type) {
+ default:
+ break;
+ }
+ }
+}
+
void __init setup_arch(char **cmdline_p)
{
printk(KERN_INFO "Command line: %s\n", boot_command_line);
@@ -282,6 +298,8 @@
strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
*cmdline_p = command_line;
+ parse_setup_data();
+
parse_early_param();
finish_e820_parsing();
@@ -340,9 +358,9 @@
reserve_bootmem_generic(table_start << PAGE_SHIFT,
(table_end - table_start) << PAGE_SHIFT);
- /* reserve kernel */
+ /* reserve kernel and setup data */
reserve_bootmem_generic(__pa_symbol(&_text),
- __pa_symbol(&_end) - __pa_symbol(&_text));
+ setup_data_end - __pa_symbol(&_text));
/*
* reserve physical page 0 - it's a special BIOS page on many boxes,
Index: linux-2.6/arch/x86/kernel/setup_32.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/setup_32.c 2007-10-23 10:01:35.000000000 +0800
+++ linux-2.6/arch/x86/kernel/setup_32.c 2007-10-23 13:51:05.000000000 +0800
@@ -66,6 +66,8 @@
address, and must not be in the .bss segment! */
unsigned long init_pg_tables_end __initdata = ~0UL;
+unsigned long setup_data_len __initdata;
+
int disable_pse __devinitdata = 0;
/*
@@ -327,7 +329,7 @@
* partially used pages are not usable - thus
* we are rounding upwards:
*/
- min_low_pfn = PFN_UP(init_pg_tables_end);
+ min_low_pfn = PFN_UP(init_pg_tables_end+setup_data_len);
find_max_pfn();
@@ -532,6 +534,22 @@
return machine_specific_memory_setup();
}
+static void __init parse_setup_data(void)
+{
+ struct setup_data *data;
+
+ if (boot_params.hdr.version < 0x0207)
+ return;
+ for (data = __va(boot_params.hdr.setup_data);
+ data != __va(0);
+ data = __va(data->next)) {
+ switch (data->type) {
+ default:
+ break;
+ }
+ }
+}
+
/*
* Determine if we were loaded by an EFI loader. If so, then we have also been
* passed the efi memmap, systab, etc., so we should use these data structures
@@ -579,6 +597,9 @@
rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0);
rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0);
#endif
+
+ parse_setup_data();
+
ARCH_SETUP
if (efi_enabled)
efi_init();
@@ -594,7 +615,7 @@
init_mm.start_code = (unsigned long) _text;
init_mm.end_code = (unsigned long) _etext;
init_mm.end_data = (unsigned long) _edata;
- init_mm.brk = init_pg_tables_end + PAGE_OFFSET;
+ init_mm.brk = init_pg_tables_end + setup_data_len + PAGE_OFFSET;
code_resource.start = virt_to_phys(_text);
code_resource.end = virt_to_phys(_etext)-1;
Index: linux-2.6/arch/x86/kernel/e820_64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/e820_64.c 2007-10-23 10:01:35.000000000 +0800
+++ linux-2.6/arch/x86/kernel/e820_64.c 2007-10-23 10:01:39.000000000 +0800
@@ -79,9 +79,9 @@
}
}
#endif
- /* kernel code */
- if (last >= __pa_symbol(&_text) && addr < __pa_symbol(&_end)) {
- *addrp = PAGE_ALIGN(__pa_symbol(&_end));
+ /* kernel code and setup data */
+ if (last >= __pa_symbol(&_text) && addr < setup_data_end) {
+ *addrp = PAGE_ALIGN(setup_data_end);
return 1;
}
Index: linux-2.6/arch/x86/kernel/head64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/head64.c 2007-10-23 10:01:35.000000000 +0800
+++ linux-2.6/arch/x86/kernel/head64.c 2007-10-23 10:01:39.000000000 +0800
@@ -19,6 +19,9 @@
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/sections.h>
+#include <asm/io.h>
+
+unsigned long __initdata setup_data_end;
static void __init zap_identity_mappings(void)
{
@@ -38,12 +41,35 @@
static void __init copy_bootdata(char *real_mode_data)
{
char * command_line;
+ void *sdata_dst;
+ struct setup_data *sdata;
+ u64 *ppa_next;
+ unsigned long sdata_len;
memcpy(&boot_params, real_mode_data, sizeof boot_params);
if (boot_params.hdr.cmd_line_ptr) {
command_line = __va(boot_params.hdr.cmd_line_ptr);
memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
}
+
+#define SETUP_DATA_ALIGN(addr) \
+ (((unsigned long)(addr)+(SETUP_DATA_ALIGN_SIZE-1))& \
+ ~(SETUP_DATA_ALIGN_SIZE-1))
+
+ sdata_dst = __va(SETUP_DATA_ALIGN(__pa_symbol(&_end)));
+ ppa_next = &boot_params.hdr.setup_data;
+ while (*ppa_next) {
+ sdata = early_ioremap(*ppa_next, sizeof(*sdata));
+ sdata_len = sizeof(*sdata) + sdata->len;
+ early_iounmap(sdata, sizeof(*sdata));
+ sdata = early_ioremap(*ppa_next, sdata_len);
+ memcpy(sdata_dst, sdata, sdata_len);
+ early_iounmap(sdata, sdata_len);
+ *ppa_next = __pa(sdata_dst);
+ ppa_next = &((struct setup_data *)sdata_dst)->next;
+ sdata_dst = (void *)SETUP_DATA_ALIGN(sdata_dst+sdata_len);
+ }
+ setup_data_end = __pa(sdata_dst);
}
void __init x86_64_start_kernel(char * real_mode_data)
Index: linux-2.6/include/asm-x86/e820_64.h
===================================================================
--- linux-2.6.orig/include/asm-x86/e820_64.h 2007-10-23 10:01:35.000000000 +0800
+++ linux-2.6/include/asm-x86/e820_64.h 2007-10-23 10:01:39.000000000 +0800
@@ -56,6 +56,7 @@
extern unsigned ebda_addr, ebda_size;
extern unsigned long nodemap_addr, nodemap_size;
+extern unsigned long setup_data_end;
#endif/*!__ASSEMBLY__*/
#endif/*__E820_HEADER*/
Index: linux-2.6/arch/x86/kernel/head_32.S
===================================================================
--- linux-2.6.orig/arch/x86/kernel/head_32.S 2007-10-23 10:01:35.000000000 +0800
+++ linux-2.6/arch/x86/kernel/head_32.S 2007-10-23 10:01:39.000000000 +0800
@@ -135,6 +135,21 @@
rep
movsl
1:
+ movl boot_params - __PAGE_OFFSET + SETUP_DATA_POINTER,%ebp
+ xorl %ecx,%ecx
+2:
+ andl %ebp,%ebp
+ jz 1f
+ movl $(SETUP_DATA_HEADER_LEN+SETUP_DATA_ALIGN_SIZE-1),%eax
+ addl (SETUP_DATA_LEN)(%ebp),%eax
+ andl $~(SETUP_DATA_ALIGN_SIZE-1),%eax
+ addl %eax, %ecx
+ movl SETUP_DATA_NEXT(%ebp),%ebp
+ jmp 2b
+1:
+ addl $(PAGE_SIZE_asm-1),%ecx
+ andl $~(PAGE_SIZE_asm-1),%ecx
+ movl %ecx,(setup_data_len - __PAGE_OFFSET)
#ifdef CONFIG_PARAVIRT
cmpw $0x207, (boot_params + BP_version - __PAGE_OFFSET)
@@ -191,13 +206,33 @@
stosl
addl $0x1000,%eax
loop 11b
- /* End condition: we must map up to and including INIT_MAP_BEYOND_END */
+ /* End condition: we must map up to and including INIT_MAP_BEYOND_END + setup_data_len */
/* bytes beyond the end of our own page tables; the +0x007 is the attribute bits */
leal (INIT_MAP_BEYOND_END+0x007)(%edi),%ebp
+ addl (setup_data_len - __PAGE_OFFSET),%ebp
cmpl %ebp,%eax
jb 10b
movl %edi,(init_pg_tables_end - __PAGE_OFFSET)
+ /* Copy setup data */
+ leal (boot_params - __PAGE_OFFSET + SETUP_DATA_POINTER),%ebx
+2:
+ movl (%ebx),%ebp
+ andl %ebp,%ebp
+ jz 1f
+ movl $SETUP_DATA_HEADER_LEN,%ecx
+ addl SETUP_DATA_LEN(%ebp),%ecx
+ addl $(SETUP_DATA_ALIGN_SIZE-1),%edi
+ andl $~(SETUP_DATA_ALIGN_SIZE-1),%edi
+ movl %edi,(%ebx)
+ movl %ebp,%esi
+ rep
+ movsb
+ movl (%ebx),%ebx
+ leal SETUP_DATA_NEXT(%ebx),%ebx
+ jmp 2b
+1:
+
xorl %ebx,%ebx /* This is the boot CPU (BSP) */
jmp 3f
/*
Index: linux-2.6/arch/x86/mm/discontig_32.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/discontig_32.c 2007-10-23 10:01:35.000000000 +0800
+++ linux-2.6/arch/x86/mm/discontig_32.c 2007-10-23 10:01:39.000000000 +0800
@@ -282,7 +282,8 @@
kva_pages = calculate_numa_remap_pages();
/* partially used pages are not usable - thus round upwards */
- system_start_pfn = min_low_pfn = PFN_UP(init_pg_tables_end);
+ system_start_pfn = min_low_pfn = PFN_UP(init_pg_tables_end +
+ setup_data_len);
kva_start_pfn = find_max_low_pfn() - kva_pages;
Index: linux-2.6/include/asm-x86/setup_32.h
===================================================================
--- linux-2.6.orig/include/asm-x86/setup_32.h 2007-10-23 10:01:35.000000000 +0800
+++ linux-2.6/include/asm-x86/setup_32.h 2007-10-23 10:01:39.000000000 +0800
@@ -25,6 +25,13 @@
#define OLD_CL_OFFSET 0x90022
#define NEW_CL_POINTER 0x228 /* Relative to real mode data */
+#define SETUP_DATA_POINTER 0x248 /* Relative to real mode data */
+
+#define SETUP_DATA_NEXT 0x0
+#define SETUP_DATA_LEN 0xc
+#define SETUP_DATA_HEADER_LEN 0x10
+#define SETUP_DATA_ALIGN_SIZE 0x8
+
#ifndef __ASSEMBLY__
#include <asm/bootparam.h>
Index: linux-2.6/include/asm-x86/setup_64.h
===================================================================
--- linux-2.6.orig/include/asm-x86/setup_64.h 2007-10-23 10:01:35.000000000 +0800
+++ linux-2.6/include/asm-x86/setup_64.h 2007-10-23 10:03:25.000000000 +0800
@@ -4,7 +4,9 @@
#define COMMAND_LINE_SIZE 2048
#ifdef __KERNEL__
+#include <linux/const.h>
+#define SETUP_DATA_ALIGN_SIZE __AC(0x8, UL)
#ifndef __ASSEMBLY__
#include <asm/bootparam.h>
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH -v7 1/3] x86 boot: setup data
2007-10-23 8:06 [PATCH -v7 1/3] x86 boot: setup data Huang, Ying
@ 2007-10-23 22:08 ` Jeremy Fitzhardinge
2007-10-23 22:15 ` H. Peter Anvin
` (2 more replies)
0 siblings, 3 replies; 11+ messages in thread
From: Jeremy Fitzhardinge @ 2007-10-23 22:08 UTC (permalink / raw)
To: Huang, Ying
Cc: H. Peter Anvin, Andi Kleen, Eric W. Biederman, akpm,
Linus Torvalds, linux-kernel
Huang, Ying wrote:
> This patch add a field of 64-bit physical pointer to NULL terminated
> single linked list of struct setup_data to real-mode kernel
> header. This is used as a more extensible boot parameters passing
> mechanism.
>
As a general comment, I can't say I'm thrilled about sticking the copied
setup data at the end of the initial pagetables. This is already a
fairly complex area, and changing it touches a surprisingly large number
of places. I wonder if there isn't a better way to deal with this
(possibly by fixing up the generation of the initial pagetables in the
process).
Or more simply, define a max size for this data, and copy it into an
initdata area (which, being initdata, can be quite large without wasting
lots of memory).
> Signed-off-by: Huang Ying <ying.huang@intel.com>
>
> ---
>
> arch/x86/boot/header.S | 6 +++++-
> arch/x86/kernel/e820_64.c | 6 +++---
> arch/x86/kernel/head64.c | 26 ++++++++++++++++++++++++++
> arch/x86/kernel/head_32.S | 37 ++++++++++++++++++++++++++++++++++++-
> arch/x86/kernel/setup_32.c | 25 +++++++++++++++++++++++--
> arch/x86/kernel/setup_64.c | 22 ++++++++++++++++++++--
> arch/x86/mm/discontig_32.c | 3 ++-
> include/asm-x86/bootparam.h | 12 ++++++++++++
> include/asm-x86/e820_64.h | 1 +
> include/asm-x86/setup_32.h | 7 +++++++
> include/asm-x86/setup_64.h | 2 ++
> 11 files changed, 137 insertions(+), 10 deletions(-)
>
> Index: linux-2.6/include/asm-x86/bootparam.h
> ===================================================================
> --- linux-2.6.orig/include/asm-x86/bootparam.h 2007-10-23 10:01:35.000000000 +0800
> +++ linux-2.6/include/asm-x86/bootparam.h 2007-10-23 10:48:48.000000000 +0800
> @@ -9,6 +9,17 @@
> #include <asm/ist.h>
> #include <video/edid.h>
>
> +/* setup data types */
> +#define SETUP_NONE 0
> +
> +/* extensible setup data list node */
> +struct setup_data {
> + u64 next;
> + u32 type;
> + u32 len;
> + u8 data[0];
> +};
>
What are the alignment rules for this structure? Is it always 64-bit
aligned? What about the relationship of len and data?
> +
> struct setup_header {
> u8 setup_sects;
> u16 root_flags;
> @@ -46,6 +57,7 @@
> u32 cmdline_size;
> u32 hardware_subarch;
> u64 hardware_subarch_data;
> + u64 setup_data;
> } __attribute__((packed));
>
> struct sys_desc_table {
> Index: linux-2.6/arch/x86/boot/header.S
> ===================================================================
> --- linux-2.6.orig/arch/x86/boot/header.S 2007-10-23 10:01:35.000000000 +0800
> +++ linux-2.6/arch/x86/boot/header.S 2007-10-23 13:51:29.000000000 +0800
> @@ -119,7 +119,7 @@
> # Part 2 of the header, from the old setup.S
>
> .ascii "HdrS" # header signature
> - .word 0x0207 # header version number (>= 0x0105)
> + .word 0x0208 # header version number (>= 0x0105)
> # or else old loadlin-1.5 will fail)
> .globl realmode_swtch
> realmode_swtch: .word 0, 0 # default_switch, SETUPSEG
> @@ -219,6 +219,10 @@
>
> hardware_subarch_data: .quad 0
>
> +setup_data: .quad 0 # 64-bit physical pointer to
> + # single linked list of
> + # struct setup_data
> +
> # End of setup header #####################################################
>
> .section ".inittext", "ax"
> Index: linux-2.6/arch/x86/kernel/setup_64.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/kernel/setup_64.c 2007-10-23 10:01:35.000000000 +0800
> +++ linux-2.6/arch/x86/kernel/setup_64.c 2007-10-23 10:48:48.000000000 +0800
> @@ -247,6 +247,22 @@
> ebda_size = 64*1024;
> }
>
> +static void __init parse_setup_data(void)
> +{
> + struct setup_data *data;
> +
> + if (boot_params.hdr.version < 0x0207)
> + return;
> + for (data = __va(boot_params.hdr.setup_data);
> + data != __va(0);
> + data = __va(data->next)) {
> + switch (data->type) {
> + default:
> + break;
> + }
> + }
> +}
> +
> void __init setup_arch(char **cmdline_p)
> {
> printk(KERN_INFO "Command line: %s\n", boot_command_line);
> @@ -282,6 +298,8 @@
> strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
> *cmdline_p = command_line;
>
> + parse_setup_data();
> +
> parse_early_param();
>
> finish_e820_parsing();
> @@ -340,9 +358,9 @@
> reserve_bootmem_generic(table_start << PAGE_SHIFT,
> (table_end - table_start) << PAGE_SHIFT);
>
> - /* reserve kernel */
> + /* reserve kernel and setup data */
> reserve_bootmem_generic(__pa_symbol(&_text),
> - __pa_symbol(&_end) - __pa_symbol(&_text));
> + setup_data_end - __pa_symbol(&_text));
>
> /*
> * reserve physical page 0 - it's a special BIOS page on many boxes,
> Index: linux-2.6/arch/x86/kernel/setup_32.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/kernel/setup_32.c 2007-10-23 10:01:35.000000000 +0800
> +++ linux-2.6/arch/x86/kernel/setup_32.c 2007-10-23 13:51:05.000000000 +0800
> @@ -66,6 +66,8 @@
> address, and must not be in the .bss segment! */
> unsigned long init_pg_tables_end __initdata = ~0UL;
>
> +unsigned long setup_data_len __initdata;
> +
> int disable_pse __devinitdata = 0;
>
> /*
> @@ -327,7 +329,7 @@
> * partially used pages are not usable - thus
> * we are rounding upwards:
> */
> - min_low_pfn = PFN_UP(init_pg_tables_end);
> + min_low_pfn = PFN_UP(init_pg_tables_end+setup_data_len);
>
> find_max_pfn();
>
> @@ -532,6 +534,22 @@
> return machine_specific_memory_setup();
> }
>
> +static void __init parse_setup_data(void)
> +{
> + struct setup_data *data;
> +
> + if (boot_params.hdr.version < 0x0207)
> + return;
> + for (data = __va(boot_params.hdr.setup_data);
> + data != __va(0);
> + data = __va(data->next)) {
> + switch (data->type) {
> + default:
> + break;
> + }
> + }
> +}
>
Please don't add new duplicated code. Put this into a common place.
> +
> /*
> * Determine if we were loaded by an EFI loader. If so, then we have also been
> * passed the efi memmap, systab, etc., so we should use these data structures
> @@ -579,6 +597,9 @@
> rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0);
> rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0);
> #endif
> +
> + parse_setup_data();
> +
> ARCH_SETUP
> if (efi_enabled)
> efi_init();
> @@ -594,7 +615,7 @@
> init_mm.start_code = (unsigned long) _text;
> init_mm.end_code = (unsigned long) _etext;
> init_mm.end_data = (unsigned long) _edata;
> - init_mm.brk = init_pg_tables_end + PAGE_OFFSET;
> + init_mm.brk = init_pg_tables_end + setup_data_len + PAGE_OFFSET;
>
> code_resource.start = virt_to_phys(_text);
> code_resource.end = virt_to_phys(_etext)-1;
> Index: linux-2.6/arch/x86/kernel/e820_64.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/kernel/e820_64.c 2007-10-23 10:01:35.000000000 +0800
> +++ linux-2.6/arch/x86/kernel/e820_64.c 2007-10-23 10:01:39.000000000 +0800
> @@ -79,9 +79,9 @@
> }
> }
> #endif
> - /* kernel code */
> - if (last >= __pa_symbol(&_text) && addr < __pa_symbol(&_end)) {
> - *addrp = PAGE_ALIGN(__pa_symbol(&_end));
> + /* kernel code and setup data */
> + if (last >= __pa_symbol(&_text) && addr < setup_data_end) {
> + *addrp = PAGE_ALIGN(setup_data_end);
> return 1;
> }
>
> Index: linux-2.6/arch/x86/kernel/head64.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/kernel/head64.c 2007-10-23 10:01:35.000000000 +0800
> +++ linux-2.6/arch/x86/kernel/head64.c 2007-10-23 10:01:39.000000000 +0800
> @@ -19,6 +19,9 @@
> #include <asm/pgtable.h>
> #include <asm/tlbflush.h>
> #include <asm/sections.h>
> +#include <asm/io.h>
> +
> +unsigned long __initdata setup_data_end;
>
> static void __init zap_identity_mappings(void)
> {
> @@ -38,12 +41,35 @@
> static void __init copy_bootdata(char *real_mode_data)
> {
> char * command_line;
> + void *sdata_dst;
> + struct setup_data *sdata;
> + u64 *ppa_next;
> + unsigned long sdata_len;
>
> memcpy(&boot_params, real_mode_data, sizeof boot_params);
> if (boot_params.hdr.cmd_line_ptr) {
> command_line = __va(boot_params.hdr.cmd_line_ptr);
> memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
> }
> +
> +#define SETUP_DATA_ALIGN(addr) \
> + (((unsigned long)(addr)+(SETUP_DATA_ALIGN_SIZE-1))& \
> + ~(SETUP_DATA_ALIGN_SIZE-1))
> +
> + sdata_dst = __va(SETUP_DATA_ALIGN(__pa_symbol(&_end)));
> + ppa_next = &boot_params.hdr.setup_data;
> + while (*ppa_next) {
> + sdata = early_ioremap(*ppa_next, sizeof(*sdata));
> + sdata_len = sizeof(*sdata) + sdata->len;
> + early_iounmap(sdata, sizeof(*sdata));
> + sdata = early_ioremap(*ppa_next, sdata_len);
> + memcpy(sdata_dst, sdata, sdata_len);
> + early_iounmap(sdata, sdata_len);
> + *ppa_next = __pa(sdata_dst);
> + ppa_next = &((struct setup_data *)sdata_dst)->next;
> + sdata_dst = (void *)SETUP_DATA_ALIGN(sdata_dst+sdata_len);
> + }
> + setup_data_end = __pa(sdata_dst);
> }
>
> void __init x86_64_start_kernel(char * real_mode_data)
> Index: linux-2.6/include/asm-x86/e820_64.h
> ===================================================================
> --- linux-2.6.orig/include/asm-x86/e820_64.h 2007-10-23 10:01:35.000000000 +0800
> +++ linux-2.6/include/asm-x86/e820_64.h 2007-10-23 10:01:39.000000000 +0800
> @@ -56,6 +56,7 @@
>
> extern unsigned ebda_addr, ebda_size;
> extern unsigned long nodemap_addr, nodemap_size;
> +extern unsigned long setup_data_end;
> #endif/*!__ASSEMBLY__*/
>
> #endif/*__E820_HEADER*/
> Index: linux-2.6/arch/x86/kernel/head_32.S
> ===================================================================
> --- linux-2.6.orig/arch/x86/kernel/head_32.S 2007-10-23 10:01:35.000000000 +0800
> +++ linux-2.6/arch/x86/kernel/head_32.S 2007-10-23 10:01:39.000000000 +0800
> @@ -135,6 +135,21 @@
> rep
> movsl
> 1:
> + movl boot_params - __PAGE_OFFSET + SETUP_DATA_POINTER,%ebp
>
You need to check the boot protocol version before looking at this pointer.
> + xorl %ecx,%ecx
> +2:
> + andl %ebp,%ebp
> + jz 1f
> + movl $(SETUP_DATA_HEADER_LEN+SETUP_DATA_ALIGN_SIZE-1),%eax
> + addl (SETUP_DATA_LEN)(%ebp),%eax
> + andl $~(SETUP_DATA_ALIGN_SIZE-1),%eax
> + addl %eax, %ecx
> + movl SETUP_DATA_NEXT(%ebp),%ebp
> + jmp 2b
> +1:
> + addl $(PAGE_SIZE_asm-1),%ecx
> + andl $~(PAGE_SIZE_asm-1),%ecx
> + movl %ecx,(setup_data_len - __PAGE_OFFSET)
>
> #ifdef CONFIG_PARAVIRT
> cmpw $0x207, (boot_params + BP_version - __PAGE_OFFSET)
> @@ -191,13 +206,33 @@
> stosl
> addl $0x1000,%eax
> loop 11b
> - /* End condition: we must map up to and including INIT_MAP_BEYOND_END */
> + /* End condition: we must map up to and including INIT_MAP_BEYOND_END + setup_data_len */
> /* bytes beyond the end of our own page tables; the +0x007 is the attribute bits */
> leal (INIT_MAP_BEYOND_END+0x007)(%edi),%ebp
> + addl (setup_data_len - __PAGE_OFFSET),%ebp
> cmpl %ebp,%eax
> jb 10b
> movl %edi,(init_pg_tables_end - __PAGE_OFFSET)
>
> + /* Copy setup data */
> + leal (boot_params - __PAGE_OFFSET + SETUP_DATA_POINTER),%ebx
> +2:
> + movl (%ebx),%ebp
> + andl %ebp,%ebp
> + jz 1f
> + movl $SETUP_DATA_HEADER_LEN,%ecx
> + addl SETUP_DATA_LEN(%ebp),%ecx
> + addl $(SETUP_DATA_ALIGN_SIZE-1),%edi
> + andl $~(SETUP_DATA_ALIGN_SIZE-1),%edi
> + movl %edi,(%ebx)
> + movl %ebp,%esi
> + rep
> + movsb
> + movl (%ebx),%ebx
> + leal SETUP_DATA_NEXT(%ebx),%ebx
> + jmp 2b
> +1:
> +
> xorl %ebx,%ebx /* This is the boot CPU (BSP) */
> jmp 3f
> /*
> Index: linux-2.6/arch/x86/mm/discontig_32.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/mm/discontig_32.c 2007-10-23 10:01:35.000000000 +0800
> +++ linux-2.6/arch/x86/mm/discontig_32.c 2007-10-23 10:01:39.000000000 +0800
> @@ -282,7 +282,8 @@
> kva_pages = calculate_numa_remap_pages();
>
> /* partially used pages are not usable - thus round upwards */
> - system_start_pfn = min_low_pfn = PFN_UP(init_pg_tables_end);
> + system_start_pfn = min_low_pfn = PFN_UP(init_pg_tables_end +
> + setup_data_len);
>
> kva_start_pfn = find_max_low_pfn() - kva_pages;
>
> Index: linux-2.6/include/asm-x86/setup_32.h
> ===================================================================
> --- linux-2.6.orig/include/asm-x86/setup_32.h 2007-10-23 10:01:35.000000000 +0800
> +++ linux-2.6/include/asm-x86/setup_32.h 2007-10-23 10:01:39.000000000 +0800
> @@ -25,6 +25,13 @@
> #define OLD_CL_OFFSET 0x90022
> #define NEW_CL_POINTER 0x228 /* Relative to real mode data */
>
> +#define SETUP_DATA_POINTER 0x248 /* Relative to real mode data */
> +
> +#define SETUP_DATA_NEXT 0x0
> +#define SETUP_DATA_LEN 0xc
> +#define SETUP_DATA_HEADER_LEN 0x10
> +#define SETUP_DATA_ALIGN_SIZE 0x8
>
No, use asm-offsets(_32|_64).c
> +
> #ifndef __ASSEMBLY__
>
> #include <asm/bootparam.h>
> Index: linux-2.6/include/asm-x86/setup_64.h
> ===================================================================
> --- linux-2.6.orig/include/asm-x86/setup_64.h 2007-10-23 10:01:35.000000000 +0800
> +++ linux-2.6/include/asm-x86/setup_64.h 2007-10-23 10:03:25.000000000 +0800
> @@ -4,7 +4,9 @@
> #define COMMAND_LINE_SIZE 2048
>
> #ifdef __KERNEL__
> +#include <linux/const.h>
>
> +#define SETUP_DATA_ALIGN_SIZE __AC(0x8, UL)
> #ifndef __ASSEMBLY__
> #include <asm/bootparam.h>
>
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH -v7 1/3] x86 boot: setup data
2007-10-23 22:08 ` Jeremy Fitzhardinge
@ 2007-10-23 22:15 ` H. Peter Anvin
2007-10-23 22:20 ` Jeremy Fitzhardinge
2007-10-23 22:52 ` H. Peter Anvin
2007-10-23 23:18 ` Andi Kleen
2 siblings, 1 reply; 11+ messages in thread
From: H. Peter Anvin @ 2007-10-23 22:15 UTC (permalink / raw)
To: Jeremy Fitzhardinge
Cc: Huang, Ying, Andi Kleen, Eric W. Biederman, akpm, Linus Torvalds,
linux-kernel
Jeremy Fitzhardinge wrote:
>> 1:
>> + movl boot_params - __PAGE_OFFSET + SETUP_DATA_POINTER,%ebp
>
> You need to check the boot protocol version before looking at this pointer.
>
No, he doesn't. The boot protocol version is communication between the
boot loader and the kernel (specifically, it is communication FROM the
kernel TO the bootloader), not between internal bits of the kernel. Th
Since the pointer is safely inside the setup template, even if the
bootloader doesn't understand boot protocol version 2.08 it will have
copied it.
So this is fine.
-hpa
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH -v7 1/3] x86 boot: setup data
2007-10-23 22:15 ` H. Peter Anvin
@ 2007-10-23 22:20 ` Jeremy Fitzhardinge
2007-10-23 22:32 ` H. Peter Anvin
0 siblings, 1 reply; 11+ messages in thread
From: Jeremy Fitzhardinge @ 2007-10-23 22:20 UTC (permalink / raw)
To: H. Peter Anvin
Cc: Huang, Ying, Andi Kleen, Eric W. Biederman, akpm, Linus Torvalds,
linux-kernel
H. Peter Anvin wrote:
> No, he doesn't. The boot protocol version is communication between
> the boot loader and the kernel (specifically, it is communication FROM
> the kernel TO the bootloader), not between internal bits of the
> kernel. Th
So the bootloader will only fill out fields the kernel claims to understand?
> Since the pointer is safely inside the setup template, even if the
> bootloader doesn't understand boot protocol version 2.08 it will have
> copied it.
OK.
J
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH -v7 1/3] x86 boot: setup data
2007-10-23 22:20 ` Jeremy Fitzhardinge
@ 2007-10-23 22:32 ` H. Peter Anvin
0 siblings, 0 replies; 11+ messages in thread
From: H. Peter Anvin @ 2007-10-23 22:32 UTC (permalink / raw)
To: Jeremy Fitzhardinge
Cc: Huang, Ying, Andi Kleen, Eric W. Biederman, akpm, Linus Torvalds,
linux-kernel
Jeremy Fitzhardinge wrote:
> H. Peter Anvin wrote:
>> No, he doesn't. The boot protocol version is communication between
>> the boot loader and the kernel (specifically, it is communication FROM
>> the kernel TO the bootloader), not between internal bits of the
>> kernel. Th
>
> So the bootloader will only fill out fields the kernel claims to understand?
>
Correct. If it fails to respect this restriction, it might even
overwrite kernel code. To make the bootloader's job easier, we have
retconned the jump instruction at the beginning of the header to also be
a limit marker.
-hpa
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH -v7 1/3] x86 boot: setup data
2007-10-23 22:08 ` Jeremy Fitzhardinge
2007-10-23 22:15 ` H. Peter Anvin
@ 2007-10-23 22:52 ` H. Peter Anvin
2007-10-23 23:26 ` Jeremy Fitzhardinge
2007-10-23 23:18 ` Andi Kleen
2 siblings, 1 reply; 11+ messages in thread
From: H. Peter Anvin @ 2007-10-23 22:52 UTC (permalink / raw)
To: Jeremy Fitzhardinge
Cc: Huang, Ying, Andi Kleen, Eric W. Biederman, akpm, Linus Torvalds,
linux-kernel
Jeremy Fitzhardinge wrote:
>
> As a general comment, I can't say I'm thrilled about sticking the copied
> setup data at the end of the initial pagetables. This is already a
> fairly complex area, and changing it touches a surprisingly large number
> of places. I wonder if there isn't a better way to deal with this
> (possibly by fixing up the generation of the initial pagetables in the
> process).
>
> Or more simply, define a max size for this data, and copy it into an
> initdata area (which, being initdata, can be quite large without wasting
> lots of memory).
>
The initdata solution is fairly sensible. It's easy, and this data
really isn't expected to grow very fast at all. Unfortunately we don't
*have* an initdata bss section at the moment, so a large buffer would
bloat the kernel binary -- at least the uncompressed one. One school of
thought says that this should be fixed :) [*]
However, appending to bss (like the pagetables already are) *should* be
a reasonable option; what really should be done there is instead of
replacing init_pg_tables_end with setup_data_end we should except
additional dynamic data items here, and have an initial_data_end
variable for the extreme end of any initial data no matter the source.
It still does touch a lot of places, but at least that way they will be
fixed once and for all. It's somewhat questionable if it isn't easier
for this particular job to just fix the initial bss issue.
Furthermore, on looking through the code again, I see a bunch of
"init_pg_tables_end + setup_data_len" which really is ugly.
>> +
>> +/* extensible setup data list node */
>> +struct setup_data {
>> + u64 next;
>> + u32 type;
>> + u32 len;
>> + u8 data[0];
>> +};
>>
>
> What are the alignment rules for this structure? Is it always 64-bit
> aligned? What about the relationship of len and data?
>
It's x86, so alignment is soft - it presumably *should* be 64-bit
aligned, but nothing break if the boot loader doesn't.
-hpa
[*] A very clean way to do that is to put said section right at the
beginning of the bss, and move __init_end after it:
.bss : AT(ADDR(.bss) - LOAD_OFFSET) {
__bss_start = .; /* BSS */
*(.init.bss)
. = ALIGN(4096);
__init_end = .;
*(.bss.page_aligned)
*(.bss)
. = ALIGN(4);
__bss_stop = .;
_end = . ;
/* This is where the kernel creates the early boot page tables */
. = ALIGN(4096);
pg0 = . ;
}
On x86-64, this entails moving .data_nosave; it probably can be moved to
the same relative position as on i386 (although I have not verified that
that is true.)
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH -v7 1/3] x86 boot: setup data
2007-10-23 22:08 ` Jeremy Fitzhardinge
2007-10-23 22:15 ` H. Peter Anvin
2007-10-23 22:52 ` H. Peter Anvin
@ 2007-10-23 23:18 ` Andi Kleen
2007-10-23 23:55 ` H. Peter Anvin
2 siblings, 1 reply; 11+ messages in thread
From: Andi Kleen @ 2007-10-23 23:18 UTC (permalink / raw)
To: Jeremy Fitzhardinge
Cc: Huang, Ying, H. Peter Anvin, Eric W. Biederman, akpm,
Linus Torvalds, linux-kernel
On Wednesday 24 October 2007 00:08:32 Jeremy Fitzhardinge wrote:
> Huang, Ying wrote:
> > This patch add a field of 64-bit physical pointer to NULL terminated
> > single linked list of struct setup_data to real-mode kernel
> > header. This is used as a more extensible boot parameters passing
> > mechanism.
> >
>
> As a general comment, I can't say I'm thrilled about sticking the copied
> setup data at the end of the initial pagetables. This is already a
> fairly complex area, and changing it touches a surprisingly large number
> of places. I wonder if there isn't a better way to deal with this
> (possibly by fixing up the generation of the initial pagetables in the
> process).
With the early reserve code in
ftp://firstfloor.org/pub/ak/x86_64/quilt/patches/early-reserve
and
ftp://firstfloor.org/pub/ak/x86_64/quilt/patches/early-alloc
this could be likely done cleaner.
-Andi
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH -v7 1/3] x86 boot: setup data
2007-10-23 22:52 ` H. Peter Anvin
@ 2007-10-23 23:26 ` Jeremy Fitzhardinge
0 siblings, 0 replies; 11+ messages in thread
From: Jeremy Fitzhardinge @ 2007-10-23 23:26 UTC (permalink / raw)
To: H. Peter Anvin
Cc: Huang, Ying, Andi Kleen, Eric W. Biederman, akpm, Linus Torvalds,
linux-kernel
H. Peter Anvin wrote:
> Furthermore, on looking through the code again, I see a bunch of
> "init_pg_tables_end + setup_data_len" which really is ugly.
Yeah, that's what I'm objecting to.
>> What are the alignment rules for this structure? Is it always 64-bit
>> aligned? What about the relationship of len and data?
>>
>
> It's x86, so alignment is soft - it presumably *should* be 64-bit
> aligned, but nothing break if the boot loader doesn't.
This was more or less a rhetorical question - the code spends some
effort in rounding len up to some alignment, so its probably worth
documenting with the structure.
J
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH -v7 1/3] x86 boot: setup data
2007-10-23 23:18 ` Andi Kleen
@ 2007-10-23 23:55 ` H. Peter Anvin
2007-10-24 3:08 ` Huang, Ying
2007-10-24 22:48 ` Andrew Morton
0 siblings, 2 replies; 11+ messages in thread
From: H. Peter Anvin @ 2007-10-23 23:55 UTC (permalink / raw)
To: Andi Kleen
Cc: Jeremy Fitzhardinge, Huang, Ying, Eric W. Biederman, akpm,
Linus Torvalds, linux-kernel
Andi Kleen wrote:
>
> With the early reserve code in
> ftp://firstfloor.org/pub/ak/x86_64/quilt/patches/early-reserve
> and
> ftp://firstfloor.org/pub/ak/x86_64/quilt/patches/early-alloc
> this could be likely done cleaner.
>
Indeed it could. For i386, the equivalent code would have another
significant benefit: reserving memory and then mapping and accessing it
later would (at least eventually) allow accesses > 4 GB on PAE kernels
(or with a PSE36 hack, on non-PAE kernels.)
-hpa
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH -v7 1/3] x86 boot: setup data
2007-10-23 23:55 ` H. Peter Anvin
@ 2007-10-24 3:08 ` Huang, Ying
2007-10-24 22:48 ` Andrew Morton
1 sibling, 0 replies; 11+ messages in thread
From: Huang, Ying @ 2007-10-24 3:08 UTC (permalink / raw)
To: H. Peter Anvin
Cc: Andi Kleen, Jeremy Fitzhardinge, Eric W. Biederman, akpm,
Linus Torvalds, linux-kernel
On Tue, 2007-10-23 at 16:55 -0700, H. Peter Anvin wrote:
> Andi Kleen wrote:
> >
> > With the early reserve code in
> > ftp://firstfloor.org/pub/ak/x86_64/quilt/patches/early-reserve
> > and
> > ftp://firstfloor.org/pub/ak/x86_64/quilt/patches/early-alloc
> > this could be likely done cleaner.
> >
>
> Indeed it could. For i386, the equivalent code would have another
> significant benefit: reserving memory and then mapping and accessing it
> later would (at least eventually) allow accesses > 4 GB on PAE kernels
> (or with a PSE36 hack, on non-PAE kernels.)
For i386, the bootmem allocator covers at most 0~796M memory area. So
some early reserve memory area can not be revered with bootmem allocator
later. Should we fix bootmem allocator firstly?
Best Regards,
Huang Ying
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH -v7 1/3] x86 boot: setup data
2007-10-23 23:55 ` H. Peter Anvin
2007-10-24 3:08 ` Huang, Ying
@ 2007-10-24 22:48 ` Andrew Morton
1 sibling, 0 replies; 11+ messages in thread
From: Andrew Morton @ 2007-10-24 22:48 UTC (permalink / raw)
To: H. Peter Anvin; +Cc: ak, jeremy, ying.huang, ebiederm, torvalds, linux-kernel
On Tue, 23 Oct 2007 16:55:13 -0700
"H. Peter Anvin" <hpa@zytor.com> wrote:
> Andi Kleen wrote:
> >
> > With the early reserve code in
> > ftp://firstfloor.org/pub/ak/x86_64/quilt/patches/early-reserve
> > and
> > ftp://firstfloor.org/pub/ak/x86_64/quilt/patches/early-alloc
> > this could be likely done cleaner.
> >
>
> Indeed it could. For i386, the equivalent code would have another
> significant benefit: reserving memory and then mapping and accessing it
> later would (at least eventually) allow accesses > 4 GB on PAE kernels
> (or with a PSE36 hack, on non-PAE kernels.)
>
Cleaner sounds good. I'll duck these patches for now.
This one doesn't apply, btw: setup_32.h disappeared.
^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2007-10-24 22:53 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-10-23 8:06 [PATCH -v7 1/3] x86 boot: setup data Huang, Ying
2007-10-23 22:08 ` Jeremy Fitzhardinge
2007-10-23 22:15 ` H. Peter Anvin
2007-10-23 22:20 ` Jeremy Fitzhardinge
2007-10-23 22:32 ` H. Peter Anvin
2007-10-23 22:52 ` H. Peter Anvin
2007-10-23 23:26 ` Jeremy Fitzhardinge
2007-10-23 23:18 ` Andi Kleen
2007-10-23 23:55 ` H. Peter Anvin
2007-10-24 3:08 ` Huang, Ying
2007-10-24 22:48 ` Andrew Morton
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).