linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] fix broken x86 SMP boot sequence
@ 2006-02-22 17:48 James Bottomley
  2006-02-22 18:08 ` Zachary Amsden
  0 siblings, 1 reply; 4+ messages in thread
From: James Bottomley @ 2006-02-22 17:48 UTC (permalink / raw)
  To: Linus Torvalds, Andrew Morton; +Cc: Zachary Amsden, linux-kernel

This patch:

[PATCH] x86: GDT alignment fix

completely broke my voyager boot sequence.  The reason is that the patch
introduces a subtle dependence on the assumption that the boot CPU is
CPU0, which is untrue for voyager.  I think the most correct fix for
this is to use a per_cpu for cpu_gdt_descr and still allocate the actual
descriptors as page allocations.  I think it's also much more efficient
to do these allocations in cpu_init() where they'll be automatic for all
subarchitectures, rather than placing them in each do_boot_cpu
implementation.  Note, I've also fixed a potentially hard to trace bug
as well: the secondaries in the prior scheme were actually using the
Boot CPU's GDT to come up with ... whatever cpu_init() and subsequent
manipulations alter it to be.  This implementation goes back to using
the special (and now not altered) cpu_gdt_table for all booting CPUs.

Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

---

James

diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c
index cbc3206..d561a56 100644
--- a/arch/i386/kernel/cpu/common.c
+++ b/arch/i386/kernel/cpu/common.c
@@ -4,6 +4,7 @@
 #include <linux/smp.h>
 #include <linux/module.h>
 #include <linux/percpu.h>
+#include <linux/bootmem.h>
 #include <asm/semaphore.h>
 #include <asm/processor.h>
 #include <asm/i387.h>
@@ -18,6 +19,9 @@
 
 #include "cpu.h"
 
+DEFINE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr);
+EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr);
+
 DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
 EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack);
 
@@ -559,8 +563,9 @@ void __devinit cpu_init(void)
 	int cpu = smp_processor_id();
 	struct tss_struct * t = &per_cpu(init_tss, cpu);
 	struct thread_struct *thread = &current->thread;
-	struct desc_struct *gdt = get_cpu_gdt_table(cpu);
+	struct desc_struct *gdt;
 	__u32 stk16_off = (__u32)&per_cpu(cpu_16bit_stack, cpu);
+	struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
 
 	if (cpu_test_and_set(cpu, cpu_initialized)) {
 		printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
@@ -577,6 +582,22 @@ void __devinit cpu_init(void)
 		set_in_cr4(X86_CR4_TSD);
 	}
 
+	/* This is a horrible hack to allocate the GDT.  The problem
+	 * is that cpu_init() is called really early for the boot CPU
+	 * (and hence needs bootmem) but much later for the secondary
+	 * CPUs, when bootmem will have gone away */
+	if (NODE_DATA(0)->bdata->node_bootmem_map) {
+		gdt = (struct desc_struct *)alloc_bootmem_pages(PAGE_SIZE);
+		/* alloc_bootmem_pages panics on failure, so no check */
+		memset(gdt, 0, PAGE_SIZE);
+	} else {
+		gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL);
+		if (unlikely(!gdt)) {
+			printk(KERN_CRIT "CPU%d failed to allocate GDT\n", cpu);
+			for (;;) local_irq_enable();
+		}
+	}
+
 	/*
 	 * Initialize the per-CPU GDT with the boot GDT,
 	 * and set up the GDT descriptor:
@@ -589,10 +610,10 @@ void __devinit cpu_init(void)
 		((((__u64)stk16_off) << 32) & 0xff00000000000000ULL) |
 		(CPU_16BIT_STACK_SIZE - 1);
 
-	cpu_gdt_descr[cpu].size = GDT_SIZE - 1;
- 	cpu_gdt_descr[cpu].address = (unsigned long)gdt;
+	cpu_gdt_descr->size = GDT_SIZE - 1;
+ 	cpu_gdt_descr->address = (unsigned long)gdt;
 
-	load_gdt(&cpu_gdt_descr[cpu]);
+	load_gdt(cpu_gdt_descr);
 	load_idt(&idt_descr);
 
 	/*
diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S
index 870f20b..e437fb3 100644
--- a/arch/i386/kernel/head.S
+++ b/arch/i386/kernel/head.S
@@ -525,5 +525,3 @@ ENTRY(cpu_gdt_table)
 	.quad 0x0000000000000000	/* 0xf0 - unused */
 	.quad 0x0000000000000000	/* 0xf8 - GDT entry 31: double-fault TSS */
 
-	/* Be sure this is zeroed to avoid false validations in Xen */
-	.fill PAGE_SIZE_asm / 8 - GDT_ENTRIES,8,0
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index b3c2e2c..9ed449a 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -903,12 +903,6 @@ static int __devinit do_boot_cpu(int api
 	unsigned long start_eip;
 	unsigned short nmi_high = 0, nmi_low = 0;
 
-	if (!cpu_gdt_descr[cpu].address &&
-	    !(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL))) {
-		printk("Failed to allocate GDT for CPU %d\n", cpu);
-		return 1;
-	}
-
 	++cpucount;
 
 	/*
diff --git a/include/asm-i386/desc.h b/include/asm-i386/desc.h
index 494e73b..89b9c32 100644
--- a/include/asm-i386/desc.h
+++ b/include/asm-i386/desc.h
@@ -25,10 +25,12 @@ struct Xgt_desc_struct {
 } __attribute__ ((packed));
 
 extern struct Xgt_desc_struct idt_descr, cpu_gdt_descr[NR_CPUS];
+DECLARE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr);
+
 
 static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
 {
-	return ((struct desc_struct *)cpu_gdt_descr[cpu].address);
+	return (struct desc_struct *)per_cpu(cpu_gdt_descr, cpu).address;
 }
 
 #define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q" (GDT_ENTRY_TSS*8))



^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] fix broken x86 SMP boot sequence
  2006-02-22 17:48 [PATCH] fix broken x86 SMP boot sequence James Bottomley
@ 2006-02-22 18:08 ` Zachary Amsden
  2006-02-22 19:06   ` James Bottomley
  2006-02-22 23:49   ` James Bottomley
  0 siblings, 2 replies; 4+ messages in thread
From: Zachary Amsden @ 2006-02-22 18:08 UTC (permalink / raw)
  To: James Bottomley; +Cc: Linus Torvalds, Andrew Morton, linux-kernel

James Bottomley wrote:

>This patch:
>  
>

I'm not adverse to making the gdt descriptors part of the per-cpu data.  
But I think your patch is missing some necessary changes to 
include/asm-i386/desc.h - what do you plan to do with the following 
definition there?

extern struct Xgt_desc_struct idt_descr, cpu_gdt_descr[NR_CPUS];


Also, it seems likely you may have broken APM and/or PnP BIOS.

>diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c
>index cbc3206..d561a56 100644
>--- a/arch/i386/kernel/cpu/common.c
>+++ b/arch/i386/kernel/cpu/common.c
>@@ -4,6 +4,7 @@
> #include <linux/smp.h>
> #include <linux/module.h>
> #include <linux/percpu.h>
>+#include <linux/bootmem.h>
> #include <asm/semaphore.h>
> #include <asm/processor.h>
> #include <asm/i387.h>
>@@ -18,6 +19,9 @@
> 
> #include "cpu.h"
> 
>+DEFINE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr);
>+EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr);
>+
> DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
> EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack);
> 
>@@ -559,8 +563,9 @@ void __devinit cpu_init(void)
> 	int cpu = smp_processor_id();
> 	struct tss_struct * t = &per_cpu(init_tss, cpu);
> 	struct thread_struct *thread = &current->thread;
>-	struct desc_struct *gdt = get_cpu_gdt_table(cpu);
>+	struct desc_struct *gdt;
> 	__u32 stk16_off = (__u32)&per_cpu(cpu_16bit_stack, cpu);
>+	struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
> 
> 	if (cpu_test_and_set(cpu, cpu_initialized)) {
> 		printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
>@@ -577,6 +582,22 @@ void __devinit cpu_init(void)
> 		set_in_cr4(X86_CR4_TSD);
> 	}
> 
>+	/* This is a horrible hack to allocate the GDT.  The problem
>+	 * is that cpu_init() is called really early for the boot CPU
>+	 * (and hence needs bootmem) but much later for the secondary
>+	 * CPUs, when bootmem will have gone away */
>+	if (NODE_DATA(0)->bdata->node_bootmem_map) {
>+		gdt = (struct desc_struct *)alloc_bootmem_pages(PAGE_SIZE);
>+		/* alloc_bootmem_pages panics on failure, so no check */
>+		memset(gdt, 0, PAGE_SIZE);
>+	} else {
>+		gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL);
>+		if (unlikely(!gdt)) {
>+			printk(KERN_CRIT "CPU%d failed to allocate GDT\n", cpu);
>+			for (;;) local_irq_enable();
>+		}
>+	}
>+
> 	/*
> 	 * Initialize the per-CPU GDT with the boot GDT,
> 	 * and set up the GDT descriptor:
>  
>


Can't you get rid of this ugly hack _and_ optimize the code at the same 
time?  I don't understand the details of voyager bringup, but it seems 
clear that the BSP or node 0 is special in both sub-architectures.  If 
it is special anyway, the conditional logic can be merged, and better 
yet, the allocation for the first caller of cpu_init() can skip the 
allocation entirely - it can simply use the boot GDT, which is already 
page-aligned and ready to go.  This gets rid of the 
alloc_bootmem_pages() piece of the hack above.

Also, it seems you left the allocation of the GDT in do_boot_cpu().  How 
do you plan to make sure that GDT allocation is compatible with hotplug 
CPU startup?  I was worried about that, which is why I moved the GDT 
allocation for secondary processors (originally in 
arch/i386/kernel/cpu/common.c) to smpboot.c.

Zach

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] fix broken x86 SMP boot sequence
  2006-02-22 18:08 ` Zachary Amsden
@ 2006-02-22 19:06   ` James Bottomley
  2006-02-22 23:49   ` James Bottomley
  1 sibling, 0 replies; 4+ messages in thread
From: James Bottomley @ 2006-02-22 19:06 UTC (permalink / raw)
  To: Zachary Amsden; +Cc: Linus Torvalds, Andrew Morton, linux-kernel

On Wed, 2006-02-22 at 10:08 -0800, Zachary Amsden wrote:
> I'm not adverse to making the gdt descriptors part of the per-cpu data.  

If you can think of another way to fix the bug, I'm open to it.

> But I think your patch is missing some necessary changes to 
> include/asm-i386/desc.h - what do you plan to do with the following 
> definition there?
> 
> extern struct Xgt_desc_struct idt_descr, cpu_gdt_descr[NR_CPUS];

Nothing ... it predates your patch ... it's not much use, but just in
case someone wants to get at the boot cpu gdt descriptors.

> Also, it seems likely you may have broken APM and/or PnP BIOS.

I don't think so, how? ... APM alters the GDT for the APM bios long
after cpu_init() is called, as does drivers/pnp/pnpbios.

> Can't you get rid of this ugly hack _and_ optimize the code at the same 
> time?  I don't understand the details of voyager bringup, but it seems 
> clear that the BSP or node 0 is special in both sub-architectures.  If 
> it is special anyway, the conditional logic can be merged, and better 
> yet, the allocation for the first caller of cpu_init() can skip the 
> allocation entirely - it can simply use the boot GDT, which is already 
> page-aligned and ready to go.  This gets rid of the 
> alloc_bootmem_pages() piece of the hack above.

Not without repeating your bug.  In the original code the gdt runs in
the boot area until cpu_init() where it switches to the original per_cpu
for the descriptor.  Your patch moved the boot CPU to using the boot
area after cpu_init(), which means subsequent boot cpu gdt changes alter
that area before the secondaries have come up (also using it).

If we have to move to individual pages for descriptors, then every CPU
needs one.

> Also, it seems you left the allocation of the GDT in do_boot_cpu().  How 
> do you plan to make sure that GDT allocation is compatible with hotplug 
> CPU startup?  I was worried about that, which is why I moved the GDT 
> allocation for secondary processors (originally in 
> arch/i386/kernel/cpu/common.c) to smpboot.c.

No, this piece of the patch:

diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index b3c2e2c..9ed449a 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -903,12 +903,6 @@ static int __devinit do_boot_cpu(int api
        unsigned long start_eip;
        unsigned short nmi_high = 0, nmi_low = 0;
 
-       if (!cpu_gdt_descr[cpu].address &&
-           !(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL)))
{
-               printk("Failed to allocate GDT for CPU %d\n", cpu);
-               return 1;
-       }
-
        ++cpucount;
 
        /*

removes it.

Since the allocation is in cpu_init(), it will be called on hotplug as
well.

James



^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] fix broken x86 SMP boot sequence
  2006-02-22 18:08 ` Zachary Amsden
  2006-02-22 19:06   ` James Bottomley
@ 2006-02-22 23:49   ` James Bottomley
  1 sibling, 0 replies; 4+ messages in thread
From: James Bottomley @ 2006-02-22 23:49 UTC (permalink / raw)
  To: Zachary Amsden; +Cc: Linus Torvalds, Andrew Morton, linux-kernel

On Wed, 2006-02-22 at 10:08 -0800, Zachary Amsden wrote:
> I'm not adverse to making the gdt descriptors part of the per-cpu data.  
> But I think your patch is missing some necessary changes to 
> include/asm-i386/desc.h - what do you plan to do with the following 
> definition there?
> 
> extern struct Xgt_desc_struct idt_descr, cpu_gdt_descr[NR_CPUS];

Actually, I checked ... it looks like the externel definition of
cpu_gdt_descr can be dumped ... nothing's using it (well, at least in a
voyager build), so I updated the patch.

James

---

diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c
index cbc3206..d561a56 100644
Index: BUILD-2.6/arch/i386/kernel/cpu/common.c
===================================================================
--- BUILD-2.6.orig/arch/i386/kernel/cpu/common.c	2006-02-22 13:53:43.000000000 -0600
+++ BUILD-2.6/arch/i386/kernel/cpu/common.c	2006-02-22 15:28:37.000000000 -0600
@@ -4,6 +4,7 @@
 #include <linux/smp.h>
 #include <linux/module.h>
 #include <linux/percpu.h>
+#include <linux/bootmem.h>
 #include <asm/semaphore.h>
 #include <asm/processor.h>
 #include <asm/i387.h>
@@ -18,6 +19,9 @@
 
 #include "cpu.h"
 
+DEFINE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr);
+EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr);
+
 DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
 EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack);
 
@@ -571,8 +575,9 @@
 	int cpu = smp_processor_id();
 	struct tss_struct * t = &per_cpu(init_tss, cpu);
 	struct thread_struct *thread = &current->thread;
-	struct desc_struct *gdt = get_cpu_gdt_table(cpu);
+	struct desc_struct *gdt;
 	__u32 stk16_off = (__u32)&per_cpu(cpu_16bit_stack, cpu);
+	struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
 
 	if (cpu_test_and_set(cpu, cpu_initialized)) {
 		printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
@@ -589,6 +594,22 @@
 		set_in_cr4(X86_CR4_TSD);
 	}
 
+	/* This is a horrible hack to allocate the GDT.  The problem
+	 * is that cpu_init() is called really early for the boot CPU
+	 * (and hence needs bootmem) but much later for the secondary
+	 * CPUs, when bootmem will have gone away */
+	if (NODE_DATA(0)->bdata->node_bootmem_map) {
+		gdt = (struct desc_struct *)alloc_bootmem_pages(PAGE_SIZE);
+		/* alloc_bootmem_pages panics on failure, so no check */
+		memset(gdt, 0, PAGE_SIZE);
+	} else {
+		gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL);
+		if (unlikely(!gdt)) {
+			printk(KERN_CRIT "CPU%d failed to allocate GDT\n", cpu);
+			for (;;) local_irq_enable();
+		}
+	}
+
 	/*
 	 * Initialize the per-CPU GDT with the boot GDT,
 	 * and set up the GDT descriptor:
@@ -601,10 +622,10 @@
 		((((__u64)stk16_off) << 32) & 0xff00000000000000ULL) |
 		(CPU_16BIT_STACK_SIZE - 1);
 
-	cpu_gdt_descr[cpu].size = GDT_SIZE - 1;
- 	cpu_gdt_descr[cpu].address = (unsigned long)gdt;
+	cpu_gdt_descr->size = GDT_SIZE - 1;
+ 	cpu_gdt_descr->address = (unsigned long)gdt;
 
-	load_gdt(&cpu_gdt_descr[cpu]);
+	load_gdt(cpu_gdt_descr);
 	load_idt(&idt_descr);
 
 	/*
Index: BUILD-2.6/arch/i386/kernel/head.S
===================================================================
--- BUILD-2.6.orig/arch/i386/kernel/head.S	2006-02-22 13:53:43.000000000 -0600
+++ BUILD-2.6/arch/i386/kernel/head.S	2006-02-22 15:28:37.000000000 -0600
@@ -534,5 +534,3 @@
 	.quad 0x0000000000000000	/* 0xf0 - unused */
 	.quad 0x0000000000000000	/* 0xf8 - GDT entry 31: double-fault TSS */
 
-	/* Be sure this is zeroed to avoid false validations in Xen */
-	.fill PAGE_SIZE_asm / 8 - GDT_ENTRIES,8,0
Index: BUILD-2.6/arch/i386/kernel/smpboot.c
===================================================================
--- BUILD-2.6.orig/arch/i386/kernel/smpboot.c	2006-02-22 13:53:43.000000000 -0600
+++ BUILD-2.6/arch/i386/kernel/smpboot.c	2006-02-22 15:28:37.000000000 -0600
@@ -898,12 +898,6 @@
 	unsigned long start_eip;
 	unsigned short nmi_high = 0, nmi_low = 0;
 
-	if (!cpu_gdt_descr[cpu].address &&
-	    !(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL))) {
-		printk("Failed to allocate GDT for CPU %d\n", cpu);
-		return 1;
-	}
-
 	++cpucount;
 
 	/*
Index: BUILD-2.6/include/asm-i386/desc.h
===================================================================
--- BUILD-2.6.orig/include/asm-i386/desc.h	2006-02-22 13:53:47.000000000 -0600
+++ BUILD-2.6/include/asm-i386/desc.h	2006-02-22 15:28:37.000000000 -0600
@@ -24,11 +24,13 @@
 	unsigned short pad;
 } __attribute__ ((packed));
 
-extern struct Xgt_desc_struct idt_descr, cpu_gdt_descr[NR_CPUS];
+extern struct Xgt_desc_struct idt_descr;
+DECLARE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr);
+
 
 static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
 {
-	return ((struct desc_struct *)cpu_gdt_descr[cpu].address);
+	return (struct desc_struct *)per_cpu(cpu_gdt_descr, cpu).address;
 }
 
 #define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q" (GDT_ENTRY_TSS*8))
Index: BUILD-2.6/arch/i386/kernel/i386_ksyms.c
===================================================================
--- BUILD-2.6.orig/arch/i386/kernel/i386_ksyms.c	2006-02-22 15:26:57.000000000 -0600
+++ BUILD-2.6/arch/i386/kernel/i386_ksyms.c	2006-02-22 15:28:54.000000000 -0600
@@ -3,8 +3,6 @@
 #include <asm/checksum.h>
 #include <asm/desc.h>
 
-EXPORT_SYMBOL_GPL(cpu_gdt_descr);
-
 EXPORT_SYMBOL(__down_failed);
 EXPORT_SYMBOL(__down_failed_interruptible);
 EXPORT_SYMBOL(__down_failed_trylock);



^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2006-02-22 23:51 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-02-22 17:48 [PATCH] fix broken x86 SMP boot sequence James Bottomley
2006-02-22 18:08 ` Zachary Amsden
2006-02-22 19:06   ` James Bottomley
2006-02-22 23:49   ` James Bottomley

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).