linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] trim memory not covered by WB MTRRs
@ 2007-06-06 19:29 Jesse Barnes
  2007-06-06 20:26 ` Justin Piszcz
                   ` (8 more replies)
  0 siblings, 9 replies; 118+ messages in thread
From: Jesse Barnes @ 2007-06-06 19:29 UTC (permalink / raw)
  To: Andi Kleen, linux-kernel; +Cc: Justin Piszcz, Eric W. Biederman

On some machines, buggy BIOSes don't properly setup WB MTRRs to
cover all available RAM, meaning the last few megs (or even gigs)
of memory will be marked uncached.  Since Linux tends to allocate
from high memory addresses first, this causes the machine to be
unusably slow as soon as the kernel starts really using memory
(i.e. right around init time).

This patch works around the problem by scanning the MTRRs at
boot and figuring out whether the current end_pfn value (setup
by early e820 code) goes beyond the highest WB MTRR range, and
if so, trimming it to match.  A fairly obnoxious KERN_WARNING
is printed too, letting the user know that not all of their
memory is available due to a likely BIOS bug.

Something similar could be done on i386 if needed, but the boot
ordering would be slightly different, since the MTRR code on i386
depends on the boot_cpu_data structure being setup.

Justin, can you please test and make sure this patch works for
you too?  It'll only work around the problem, but it's better
than having to do mem= by hand or waiting for a fix from your
BIOS vendor.

Thanks,
Jesse

Signed-off-by:  Jesse Barnes <jesse.barnes@intel.com>

diff --git a/arch/i386/kernel/cpu/mtrr/generic.c b/arch/i386/kernel/cpu/mtrr/generic.c
index c4ebb51..71fc768 100644
--- a/arch/i386/kernel/cpu/mtrr/generic.c
+++ b/arch/i386/kernel/cpu/mtrr/generic.c
@@ -13,7 +13,7 @@
 #include "mtrr.h"
 
 struct mtrr_state {
-	struct mtrr_var_range *var_ranges;
+	struct mtrr_var_range var_ranges[NUM_VAR_RANGES];
 	mtrr_type fixed_ranges[NUM_FIXED_RANGES];
 	unsigned char enabled;
 	unsigned char have_fixed;
@@ -84,12 +84,6 @@ void get_mtrr_state(void)
 	struct mtrr_var_range *vrs;
 	unsigned lo, dummy;
 
-	if (!mtrr_state.var_ranges) {
-		mtrr_state.var_ranges = kmalloc(num_var_ranges * sizeof (struct mtrr_var_range), 
-						GFP_KERNEL);
-		if (!mtrr_state.var_ranges)
-			return;
-	} 
 	vrs = mtrr_state.var_ranges;
 
 	rdmsr(MTRRcap_MSR, lo, dummy);
diff --git a/arch/i386/kernel/cpu/mtrr/if.c b/arch/i386/kernel/cpu/mtrr/if.c
index c7d8f17..d7922ce 100644
--- a/arch/i386/kernel/cpu/mtrr/if.c
+++ b/arch/i386/kernel/cpu/mtrr/if.c
@@ -12,7 +12,7 @@
 #include "mtrr.h"
 
 /* RED-PEN: this is accessed without any locking */
-extern unsigned int *usage_table;
+extern unsigned int usage_table[];
 
 
 #define FILE_FCOUNT(f) (((struct seq_file *)((f)->private_data))->private)
diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c
index 1cf466d..c133856 100644
--- a/arch/i386/kernel/cpu/mtrr/main.c
+++ b/arch/i386/kernel/cpu/mtrr/main.c
@@ -38,8 +38,8 @@
 #include <linux/cpu.h>
 #include <linux/mutex.h>
 
+#include <asm/e820.h>
 #include <asm/mtrr.h>
-
 #include <asm/uaccess.h>
 #include <asm/processor.h>
 #include <asm/msr.h>
@@ -47,7 +47,7 @@
 
 u32 num_var_ranges = 0;
 
-unsigned int *usage_table;
+unsigned int usage_table[NUM_VAR_RANGES];
 static DEFINE_MUTEX(mtrr_mutex);
 
 u64 size_or_mask, size_and_mask;
@@ -121,11 +121,6 @@ static void __init init_table(void)
 	int i, max;
 
 	max = num_var_ranges;
-	if ((usage_table = kmalloc(max * sizeof *usage_table, GFP_KERNEL))
-	    == NULL) {
-		printk(KERN_ERR "mtrr: could not allocate\n");
-		return;
-	}
 	for (i = 0; i < max; i++)
 		usage_table[i] = 1;
 }
@@ -589,16 +584,11 @@ struct mtrr_value {
 	unsigned long	lsize;
 };
 
-static struct mtrr_value * mtrr_state;
+static struct mtrr_value mtrr_state[NUM_VAR_RANGES];
 
 static int mtrr_save(struct sys_device * sysdev, pm_message_t state)
 {
 	int i;
-	int size = num_var_ranges * sizeof(struct mtrr_value);
-
-	mtrr_state = kzalloc(size,GFP_ATOMIC);
-	if (!mtrr_state)
-		return -ENOMEM;
 
 	for (i = 0; i < num_var_ranges; i++) {
 		mtrr_if->get(i,
@@ -620,7 +610,6 @@ static int mtrr_restore(struct sys_device * sysdev)
 				 mtrr_state[i].lsize,
 				 mtrr_state[i].ltype);
 	}
-	kfree(mtrr_state);
 	return 0;
 }
 
@@ -631,6 +620,42 @@ static struct sysdev_driver mtrr_sysdev_driver = {
 	.resume		= mtrr_restore,
 };
 
+/**
+ * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs
+ *
+ * Some buggy BIOSes don't setup the MTRRs properly for systems with certain
+ * memory configurations.  This routine checks to make sure the MTRRs having
+ * a write back type cover all of the memory the kernel is intending to use.
+ * If not, it'll trim any memory off the end by adjusting end_pfn, removing
+ * it from the kernel's allocation pools, warning the user with an obnoxious
+ * message.
+ */
+void __init mtrr_trim_uncached_memory(void)
+{
+	unsigned long i, base, size, highest_addr = 0;
+	mtrr_type type;
+
+	/* Find highest cached pfn */
+	for (i = 0; i < num_var_ranges; i++) {
+		mtrr_if->get(i, &base, &size, &type);
+		if (type != MTRR_TYPE_WRBACK)
+			continue;
+		base <<= PAGE_SHIFT;
+		size <<= PAGE_SHIFT;
+		if (highest_addr < base + size)
+			highest_addr = base + size;
+	}
+
+	if ((highest_addr >> PAGE_SHIFT) != end_pfn) {
+		printk(KERN_WARNING "***************\n");
+		printk(KERN_WARNING "**** WARNING: likely BIOS bug\n");
+		printk(KERN_WARNING "**** MTRRs don't cover all of "
+		       "memory, trimmed %ld pages\n", end_pfn -
+		       (highest_addr >> PAGE_SHIFT));
+		printk(KERN_WARNING "***************\n");
+		end_pfn = highest_addr >> PAGE_SHIFT;
+	}
+}
 
 /**
  * mtrr_bp_init - initialize mtrrs on the boot CPU
diff --git a/arch/i386/kernel/cpu/mtrr/mtrr.h b/arch/i386/kernel/cpu/mtrr/mtrr.h
index 289dfe6..a29dcba 100644
--- a/arch/i386/kernel/cpu/mtrr/mtrr.h
+++ b/arch/i386/kernel/cpu/mtrr/mtrr.h
@@ -14,6 +14,7 @@
 #define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
 
 #define NUM_FIXED_RANGES 88
+#define NUM_VAR_RANGES 256
 #define MTRRfix64K_00000_MSR 0x250
 #define MTRRfix16K_80000_MSR 0x258
 #define MTRRfix16K_A0000_MSR 0x259
diff --git a/arch/x86_64/kernel/bugs.c b/arch/x86_64/kernel/bugs.c
index c3c6b91..c138eac 100644
--- a/arch/x86_64/kernel/bugs.c
+++ b/arch/x86_64/kernel/bugs.c
@@ -14,7 +14,6 @@
 void __init check_bugs(void)
 {
 	identify_cpu(&boot_cpu_data);
-	mtrr_bp_init();
 #if !defined(CONFIG_SMP)
 	printk("CPU: ");
 	print_cpu_info(&boot_cpu_data);
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index eb6524f..409b63c 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -266,6 +266,10 @@ void __init setup_arch(char **cmdline_p)
 	 * we are rounding upwards:
 	 */
 	end_pfn = e820_end_of_ram();
+	/* Trim memory not covered by WB MTRRs */
+	mtrr_bp_init();
+	mtrr_trim_uncached_memory();
+
 	num_physpages = end_pfn;
 
 	check_efer();
diff --git a/include/asm-x86_64/mtrr.h b/include/asm-x86_64/mtrr.h
index b557c48..cc62bd8 100644
--- a/include/asm-x86_64/mtrr.h
+++ b/include/asm-x86_64/mtrr.h
@@ -78,6 +78,7 @@ extern int mtrr_add_page (unsigned long base, unsigned long size,
 		     unsigned int type, char increment);
 extern int mtrr_del (int reg, unsigned long base, unsigned long size);
 extern int mtrr_del_page (int reg, unsigned long base, unsigned long size);
+extern void mtrr_trim_uncached_memory(void);
 #  else
 static __inline__ int mtrr_add (unsigned long base, unsigned long size,
 				unsigned int type, char increment)

^ permalink raw reply related	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-06 19:29 [PATCH] trim memory not covered by WB MTRRs Jesse Barnes
@ 2007-06-06 20:26 ` Justin Piszcz
  2007-06-06 20:28   ` Jesse Barnes
  2007-06-06 21:53 ` Justin Piszcz
                   ` (7 subsequent siblings)
  8 siblings, 1 reply; 118+ messages in thread
From: Justin Piszcz @ 2007-06-06 20:26 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: Andi Kleen, linux-kernel, Eric W. Biederman



On Wed, 6 Jun 2007, Jesse Barnes wrote:

> On some machines, buggy BIOSes don't properly setup WB MTRRs to
> cover all available RAM, meaning the last few megs (or even gigs)
> of memory will be marked uncached.  Since Linux tends to allocate
> from high memory addresses first, this causes the machine to be
> unusably slow as soon as the kernel starts really using memory
> (i.e. right around init time).
>
> This patch works around the problem by scanning the MTRRs at
> boot and figuring out whether the current end_pfn value (setup
> by early e820 code) goes beyond the highest WB MTRR range, and
> if so, trimming it to match.  A fairly obnoxious KERN_WARNING
> is printed too, letting the user know that not all of their
> memory is available due to a likely BIOS bug.
>
> Something similar could be done on i386 if needed, but the boot
> ordering would be slightly different, since the MTRR code on i386
> depends on the boot_cpu_data structure being setup.
>
> Justin, can you please test and make sure this patch works for
> you too?  It'll only work around the problem, but it's better
> than having to do mem= by hand or waiting for a fix from your
> BIOS vendor.
>
> Thanks,
> Jesse

Against what kernel version does this patch apply?

linux-2.6.21# patch -p1 < ../mtrr.patch
patching file arch/i386/kernel/cpu/mtrr/generic.c
Hunk #2 succeeded at 66 (offset -18 lines).
patching file arch/i386/kernel/cpu/mtrr/if.c
patching file arch/i386/kernel/cpu/mtrr/main.c
patching file arch/i386/kernel/cpu/mtrr/mtrr.h
can't find file to patch at input line 160
Perhaps you used the wrong -p or --strip option?
The text leading up to this was:
--------------------------
|diff --git a/arch/x86_64/kernel/bugs.c b/arch/x86_64/kernel/bugs.c
|index c3c6b91..c138eac 100644
|--- a/arch/x86_64/kernel/bugs.c
|+++ b/arch/x86_64/kernel/bugs.c
--------------------------
File to patch:


^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-06 20:26 ` Justin Piszcz
@ 2007-06-06 20:28   ` Jesse Barnes
  2007-06-06 20:31     ` Jesse Barnes
  0 siblings, 1 reply; 118+ messages in thread
From: Jesse Barnes @ 2007-06-06 20:28 UTC (permalink / raw)
  To: Justin Piszcz; +Cc: Andi Kleen, linux-kernel, Eric W. Biederman

On Wednesday, June 6, 2007 1:26 pm Justin Piszcz wrote:
> On Wed, 6 Jun 2007, Jesse Barnes wrote:
> > On some machines, buggy BIOSes don't properly setup WB MTRRs to
> > cover all available RAM, meaning the last few megs (or even gigs)
> > of memory will be marked uncached.  Since Linux tends to allocate
> > from high memory addresses first, this causes the machine to be
> > unusably slow as soon as the kernel starts really using memory
> > (i.e. right around init time).
> >
> > This patch works around the problem by scanning the MTRRs at
> > boot and figuring out whether the current end_pfn value (setup
> > by early e820 code) goes beyond the highest WB MTRR range, and
> > if so, trimming it to match.  A fairly obnoxious KERN_WARNING
> > is printed too, letting the user know that not all of their
> > memory is available due to a likely BIOS bug.
> >
> > Something similar could be done on i386 if needed, but the boot
> > ordering would be slightly different, since the MTRR code on i386
> > depends on the boot_cpu_data structure being setup.
> >
> > Justin, can you please test and make sure this patch works for
> > you too?  It'll only work around the problem, but it's better
> > than having to do mem= by hand or waiting for a fix from your
> > BIOS vendor.
> >
> > Thanks,
> > Jesse
>
> Against what kernel version does this patch apply?

Um... git as of b4946ffb1860597b187d78d61ac6504177eb0ff8.  Sorry I 
should have updated before spinning the patch (will do now).

Jesse

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-06 20:28   ` Jesse Barnes
@ 2007-06-06 20:31     ` Jesse Barnes
  2007-06-06 20:37       ` Justin Piszcz
  0 siblings, 1 reply; 118+ messages in thread
From: Jesse Barnes @ 2007-06-06 20:31 UTC (permalink / raw)
  To: Justin Piszcz; +Cc: Andi Kleen, linux-kernel, Eric W. Biederman

On Wednesday, June 6, 2007 1:28 pm Jesse Barnes wrote:
> > Against what kernel version does this patch apply?
>
> Um... git as of b4946ffb1860597b187d78d61ac6504177eb0ff8.  Sorry I
> should have updated before spinning the patch (will do now).

Appears to apply cleanly to git head as of a minute ago too.

Jesse

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-06 20:31     ` Jesse Barnes
@ 2007-06-06 20:37       ` Justin Piszcz
  2007-06-06 20:50         ` Jesse Barnes
  0 siblings, 1 reply; 118+ messages in thread
From: Justin Piszcz @ 2007-06-06 20:37 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: Andi Kleen, linux-kernel, Eric W. Biederman



On Wed, 6 Jun 2007, Jesse Barnes wrote:

> On Wednesday, June 6, 2007 1:28 pm Jesse Barnes wrote:
>>> Against what kernel version does this patch apply?
>>
>> Um... git as of b4946ffb1860597b187d78d61ac6504177eb0ff8.  Sorry I
>> should have updated before spinning the patch (will do now).
>
> Appears to apply cleanly to git head as of a minute ago too.
>
> Jesse
>

Can you produce a patch against 2.6.22-rc4 or 2.6.21 so I and other people 
can easily try it? I do not have git installed on this machine, thanks.

Justin.

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-06 20:37       ` Justin Piszcz
@ 2007-06-06 20:50         ` Jesse Barnes
  2007-06-06 21:26           ` Justin Piszcz
  0 siblings, 1 reply; 118+ messages in thread
From: Jesse Barnes @ 2007-06-06 20:50 UTC (permalink / raw)
  To: Justin Piszcz; +Cc: Andi Kleen, linux-kernel, Eric W. Biederman

On Wednesday, June 6, 2007 1:37 pm Justin Piszcz wrote:
> On Wed, 6 Jun 2007, Jesse Barnes wrote:
> > On Wednesday, June 6, 2007 1:28 pm Jesse Barnes wrote:
> >>> Against what kernel version does this patch apply?
> >>
> >> Um... git as of b4946ffb1860597b187d78d61ac6504177eb0ff8.  Sorry I
> >> should have updated before spinning the patch (will do now).
> >
> > Appears to apply cleanly to git head as of a minute ago too.
> >
> > Jesse
>
> Can you produce a patch against 2.6.22-rc4 or 2.6.21 so I and other
> people can easily try it? I do not have git installed on this
> machine, thanks.

Seems to apply cleanly to 2.6.22-rc4 too.  Haven't tested that though.

Jesse

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-06 20:50         ` Jesse Barnes
@ 2007-06-06 21:26           ` Justin Piszcz
  0 siblings, 0 replies; 118+ messages in thread
From: Justin Piszcz @ 2007-06-06 21:26 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: Andi Kleen, linux-kernel, Eric W. Biederman

Will give it a shot.

On Wed, 6 Jun 2007, Jesse Barnes wrote:

> On Wednesday, June 6, 2007 1:37 pm Justin Piszcz wrote:
>> On Wed, 6 Jun 2007, Jesse Barnes wrote:
>>> On Wednesday, June 6, 2007 1:28 pm Jesse Barnes wrote:
>>>>> Against what kernel version does this patch apply?
>>>>
>>>> Um... git as of b4946ffb1860597b187d78d61ac6504177eb0ff8.  Sorry I
>>>> should have updated before spinning the patch (will do now).
>>>
>>> Appears to apply cleanly to git head as of a minute ago too.
>>>
>>> Jesse
>>
>> Can you produce a patch against 2.6.22-rc4 or 2.6.21 so I and other
>> people can easily try it? I do not have git installed on this
>> machine, thanks.
>
> Seems to apply cleanly to 2.6.22-rc4 too.  Haven't tested that though.
>
> Jesse
>

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-06 19:29 [PATCH] trim memory not covered by WB MTRRs Jesse Barnes
  2007-06-06 20:26 ` Justin Piszcz
@ 2007-06-06 21:53 ` Justin Piszcz
  2007-06-06 22:03   ` Justin Piszcz
  2007-06-07  7:45 ` Eric W. Biederman
                   ` (6 subsequent siblings)
  8 siblings, 1 reply; 118+ messages in thread
From: Justin Piszcz @ 2007-06-06 21:53 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: Andi Kleen, linux-kernel, Eric W. Biederman



On Wed, 6 Jun 2007, Jesse Barnes wrote:

> On some machines, buggy BIOSes don't properly setup WB MTRRs to
> cover all available RAM, meaning the last few megs (or even gigs)
> of memory will be marked uncached.  Since Linux tends to allocate
> from high memory addresses first, this causes the machine to be
> unusably slow as soon as the kernel starts really using memory
> (i.e. right around init time).
>
> This patch works around the problem by scanning the MTRRs at
> boot and figuring out whether the current end_pfn value (setup
> by early e820 code) goes beyond the highest WB MTRR range, and
> if so, trimming it to match.  A fairly obnoxious KERN_WARNING
> is printed too, letting the user know that not all of their
> memory is available due to a likely BIOS bug.
>
> Something similar could be done on i386 if needed, but the boot
> ordering would be slightly different, since the MTRR code on i386
> depends on the boot_cpu_data structure being setup.
>
> Justin, can you please test and make sure this patch works for
> you too?  It'll only work around the problem, but it's better
> than having to do mem= by hand or waiting for a fix from your
> BIOS vendor.
>
> Thanks,
> Jesse

Jesse, it worked.

With mem=8832M (without your patch): 2.6.22-rc4:

top - 17:39:02 up 1 day,  8:07, 25 users,  load average: 2.33, 0.76, 0.30
Tasks: 325 total,  11 running, 314 sleeping,   0 stopped,   0 zombie
Cpu(s): 80.0%us, 20.0%sy,  0.0%ni,  0.0%id,  0.0%wa,  0.0%hi,  0.0%si,  0.0%st
Mem:   8039620k total,  7936472k used,   103148k free,      708k buffers
Swap: 16787768k total,      128k used, 16787640k free,  6646248k cached

With no mem= in append line (with your patch): 2.6.22-rc4:

top - 17:44:01 up 1 min,  1 user,  load average: 0.97, 0.25, 0.08
Tasks: 145 total,   1 running, 144 sleeping,   0 stopped,   0 zombie
Cpu(s):  5.2%us,  3.0%sy,  1.2%ni, 86.8%id,  3.8%wa,  0.0%hi,  0.0%si,  0.0%st
Mem:   8039608k total,   969380k used,  7070228k free,     1232k buffers
Swap: 16787768k total,        0k used, 16787768k free,   109448k cached

Odd, remote netconsole did not capture the dmesg the E820 memory map.

Jun  6 17:43:03 p34 [   53.598611] ahci 0000:00:1f.2: AHCI 0001.0100 32 slots 6 ports 3 Gbps 0x3f impl SATA mode 
Jun  6 17:43:03 p34 [   53.598683] ahci 0000:00:1f.2: flags: 64bit ncq led clo pio slum part 
Jun  6 17:43:03 p34 [   53.598986] scsi0 : ahci 
Jun  6 17:43:03 p34 [   53.599131] scsi1 : ahci 
Jun  6 17:43:03 p34 [   53.599239] scsi2 : ahci 
Jun  6 17:43:03 p34 [   53.599340] scsi3 : ahci 
Jun  6 17:43:03 p34 [   53.599438] scsi4 : ahci

I will run with this patch for a while but so far, no issues, everything 
looks great.

Will it make it into 2.6.22-rc5? :)

Justin.

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-06 21:53 ` Justin Piszcz
@ 2007-06-06 22:03   ` Justin Piszcz
  2007-06-06 22:05     ` Jesse Barnes
  0 siblings, 1 reply; 118+ messages in thread
From: Justin Piszcz @ 2007-06-06 22:03 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: Andi Kleen, linux-kernel, Eric W. Biederman

Mem:   8039620k total,  7936472k used,   103148k free,      708k buffers
Mem:   8039608k total,   969380k used,  7070228k free,     1232k buffers

I am curious, why does the patch != the mem=8832M?

Justin.

On Wed, 6 Jun 2007, Justin Piszcz wrote:

>
>
> On Wed, 6 Jun 2007, Jesse Barnes wrote:
>
>> On some machines, buggy BIOSes don't properly setup WB MTRRs to
>> cover all available RAM, meaning the last few megs (or even gigs)
>> of memory will be marked uncached.  Since Linux tends to allocate
>> from high memory addresses first, this causes the machine to be
>> unusably slow as soon as the kernel starts really using memory
>> (i.e. right around init time).
>> 
>> This patch works around the problem by scanning the MTRRs at
>> boot and figuring out whether the current end_pfn value (setup
>> by early e820 code) goes beyond the highest WB MTRR range, and
>> if so, trimming it to match.  A fairly obnoxious KERN_WARNING
>> is printed too, letting the user know that not all of their
>> memory is available due to a likely BIOS bug.
>> 
>> Something similar could be done on i386 if needed, but the boot
>> ordering would be slightly different, since the MTRR code on i386
>> depends on the boot_cpu_data structure being setup.
>> 
>> Justin, can you please test and make sure this patch works for
>> you too?  It'll only work around the problem, but it's better
>> than having to do mem= by hand or waiting for a fix from your
>> BIOS vendor.
>> 
>> Thanks,
>> Jesse
>
> Jesse, it worked.
>
> With mem=8832M (without your patch): 2.6.22-rc4:
>
> top - 17:39:02 up 1 day,  8:07, 25 users,  load average: 2.33, 0.76, 0.30
> Tasks: 325 total,  11 running, 314 sleeping,   0 stopped,   0 zombie
> Cpu(s): 80.0%us, 20.0%sy,  0.0%ni,  0.0%id,  0.0%wa,  0.0%hi,  0.0%si, 
> 0.0%st
> Mem:   8039620k total,  7936472k used,   103148k free,      708k buffers
> Swap: 16787768k total,      128k used, 16787640k free,  6646248k cached
>
> With no mem= in append line (with your patch): 2.6.22-rc4:
>
> top - 17:44:01 up 1 min,  1 user,  load average: 0.97, 0.25, 0.08
> Tasks: 145 total,   1 running, 144 sleeping,   0 stopped,   0 zombie
> Cpu(s):  5.2%us,  3.0%sy,  1.2%ni, 86.8%id,  3.8%wa,  0.0%hi,  0.0%si, 
> 0.0%st
> Mem:   8039608k total,   969380k used,  7070228k free,     1232k buffers
> Swap: 16787768k total,        0k used, 16787768k free,   109448k cached
>
> Odd, remote netconsole did not capture the dmesg the E820 memory map.
>
> Jun  6 17:43:03 p34 [   53.598611] ahci 0000:00:1f.2: AHCI 0001.0100 32 slots 
> 6 ports 3 Gbps 0x3f impl SATA mode Jun  6 17:43:03 p34 [   53.598683] ahci 
> 0000:00:1f.2: flags: 64bit ncq led clo pio slum part Jun  6 17:43:03 p34 [ 
> 53.598986] scsi0 : ahci Jun  6 17:43:03 p34 [   53.599131] scsi1 : ahci Jun 
> 6 17:43:03 p34 [   53.599239] scsi2 : ahci Jun  6 17:43:03 p34 [   53.599340] 
> scsi3 : ahci Jun  6 17:43:03 p34 [   53.599438] scsi4 : ahci
>
> I will run with this patch for a while but so far, no issues, everything 
> looks great.
>
> Will it make it into 2.6.22-rc5? :)
>
> Justin.
>

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-06 22:03   ` Justin Piszcz
@ 2007-06-06 22:05     ` Jesse Barnes
  2007-06-06 22:07       ` Justin Piszcz
  2007-06-06 22:13       ` Justin Piszcz
  0 siblings, 2 replies; 118+ messages in thread
From: Jesse Barnes @ 2007-06-06 22:05 UTC (permalink / raw)
  To: Justin Piszcz; +Cc: Andi Kleen, linux-kernel, Eric W. Biederman

On Wednesday, June 6, 2007 3:03 pm Justin Piszcz wrote:
> Mem:   8039620k total,  7936472k used,   103148k free,      708k
> buffers Mem:   8039608k total,   969380k used,  7070228k free,    
> 1232k buffers
>
> I am curious, why does the patch != the mem=8832M?

I'm not sure... can you post your e820 map from boot and the contents 
of /proc/mtrr?  Maybe my patch is trimming off a few too many pages, or 
maybe 8832M isn't quite right and actually ends up leaving you with a 
few uncached pages.

Jesse

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-06 22:05     ` Jesse Barnes
@ 2007-06-06 22:07       ` Justin Piszcz
  2007-06-06 22:13       ` Justin Piszcz
  1 sibling, 0 replies; 118+ messages in thread
From: Justin Piszcz @ 2007-06-06 22:07 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: Andi Kleen, linux-kernel, Eric W. Biederman



On Wed, 6 Jun 2007, Jesse Barnes wrote:

> On Wednesday, June 6, 2007 3:03 pm Justin Piszcz wrote:
>> Mem:   8039620k total,  7936472k used,   103148k free,      708k
>> buffers Mem:   8039608k total,   969380k used,  7070228k free,
>> 1232k buffers
>>
>> I am curious, why does the patch != the mem=8832M?
>
> I'm not sure... can you post your e820 map from boot and the contents
> of /proc/mtrr?  Maybe my patch is trimming off a few too many pages, or
> maybe 8832M isn't quite right and actually ends up leaving you with a
> few uncached pages.
>
> Jesse
>

I cannot post the E820 memory map, I have no way to capture it, I cannot 
get anything from netconsole and dmesg does not show it either.

BEFORE:
reg00: base=0x00000000 (   0MB), size=2048MB: write-back, count=1
reg01: base=0x80000000 (2048MB), size=1024MB: write-back, count=1
reg02: base=0xc0000000 (3072MB), size= 256MB: write-back, count=1
reg03: base=0xcf800000 (3320MB), size=   8MB: uncachable, count=1
reg04: base=0xcf700000 (3319MB), size=   1MB: uncachable, count=1
reg05: base=0x100000000 (4096MB), size=4096MB: write-back, count=1
reg06: base=0x200000000 (8192MB), size= 512MB: write-back, count=1
reg07: base=0x220000000 (8704MB), size= 128MB: write-back, count=1

AFTER:
$ cat /proc/mtrr
reg00: base=0x00000000 (   0MB), size=2048MB: write-back, count=1
reg01: base=0x80000000 (2048MB), size=1024MB: write-back, count=1
reg02: base=0xc0000000 (3072MB), size= 256MB: write-back, count=1
reg03: base=0xcf800000 (3320MB), size=   8MB: uncachable, count=1
reg04: base=0xcf700000 (3319MB), size=   1MB: uncachable, count=1
reg05: base=0x100000000 (4096MB), size=4096MB: write-back, count=1
reg06: base=0x200000000 (8192MB), size= 512MB: write-back, count=1
reg07: base=0x220000000 (8704MB), size= 128MB: write-back, count=1

But that still works.

Justin.

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-06 22:05     ` Jesse Barnes
  2007-06-06 22:07       ` Justin Piszcz
@ 2007-06-06 22:13       ` Justin Piszcz
  2007-06-06 22:24         ` Jesse Barnes
  1 sibling, 1 reply; 118+ messages in thread
From: Justin Piszcz @ 2007-06-06 22:13 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: Andi Kleen, linux-kernel, Eric W. Biederman



On Wed, 6 Jun 2007, Jesse Barnes wrote:

> On Wednesday, June 6, 2007 3:03 pm Justin Piszcz wrote:
>> Mem:   8039620k total,  7936472k used,   103148k free,      708k
>> buffers Mem:   8039608k total,   969380k used,  7070228k free,
>> 1232k buffers
>>
>> I am curious, why does the patch != the mem=8832M?
>
> I'm not sure... can you post your e820 map from boot and the contents
> of /proc/mtrr?  Maybe my patch is trimming off a few too many pages, or
> maybe 8832M isn't quite right and actually ends up leaving you with a
> few uncached pages.
>
> Jesse
>

Unless you know of some other way I can capture the output, it only starts 
showing the dmesg from [50..] onward.

Justin.

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-06 22:13       ` Justin Piszcz
@ 2007-06-06 22:24         ` Jesse Barnes
  2007-06-06 22:26           ` Justin Piszcz
  0 siblings, 1 reply; 118+ messages in thread
From: Jesse Barnes @ 2007-06-06 22:24 UTC (permalink / raw)
  To: Justin Piszcz; +Cc: Andi Kleen, linux-kernel, Eric W. Biederman

On Wednesday, June 6, 2007 3:13 pm Justin Piszcz wrote:
> On Wed, 6 Jun 2007, Jesse Barnes wrote:
> > On Wednesday, June 6, 2007 3:03 pm Justin Piszcz wrote:
> >> Mem:   8039620k total,  7936472k used,   103148k free,      708k
> >> buffers Mem:   8039608k total,   969380k used,  7070228k free,
> >> 1232k buffers
> >>
> >> I am curious, why does the patch != the mem=8832M?
> >
> > I'm not sure... can you post your e820 map from boot and the
> > contents of /proc/mtrr?  Maybe my patch is trimming off a few too
> > many pages, or maybe 8832M isn't quite right and actually ends up
> > leaving you with a few uncached pages.
> >
> > Jesse
>
> Unless you know of some other way I can capture the output, it only
> starts showing the dmesg from [50..] onward.

Did you boot the kernel with the 'debug' option?  Maybe your dmesg 
buffer is too small (there's a config option for that iirc).

Jesse

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-06 22:24         ` Jesse Barnes
@ 2007-06-06 22:26           ` Justin Piszcz
  2007-06-06 22:28             ` Jesse Barnes
  0 siblings, 1 reply; 118+ messages in thread
From: Justin Piszcz @ 2007-06-06 22:26 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: Andi Kleen, linux-kernel, Eric W. Biederman



On Wed, 6 Jun 2007, Jesse Barnes wrote:

> On Wednesday, June 6, 2007 3:13 pm Justin Piszcz wrote:
>> On Wed, 6 Jun 2007, Jesse Barnes wrote:
>>> On Wednesday, June 6, 2007 3:03 pm Justin Piszcz wrote:
>>>> Mem:   8039620k total,  7936472k used,   103148k free,      708k
>>>> buffers Mem:   8039608k total,   969380k used,  7070228k free,
>>>> 1232k buffers
>>>>
>>>> I am curious, why does the patch != the mem=8832M?
>>>
>>> I'm not sure... can you post your e820 map from boot and the
>>> contents of /proc/mtrr?  Maybe my patch is trimming off a few too
>>> many pages, or maybe 8832M isn't quite right and actually ends up
>>> leaving you with a few uncached pages.
>>>
>>> Jesse
>>
>> Unless you know of some other way I can capture the output, it only
>> starts showing the dmesg from [50..] onward.
>
> Did you boot the kernel with the 'debug' option?  Maybe your dmesg
> buffer is too small (there's a config option for that iirc).
>
> Jesse
>

Nope, I booted with only netconsole= options.  I have a lot of HW in the 
box and I guess the buffer is too small.  Not sure where to change it in 
the kernel.  Looking..

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-06 22:26           ` Justin Piszcz
@ 2007-06-06 22:28             ` Jesse Barnes
  2007-06-06 22:31               ` Justin Piszcz
                                 ` (4 more replies)
  0 siblings, 5 replies; 118+ messages in thread
From: Jesse Barnes @ 2007-06-06 22:28 UTC (permalink / raw)
  To: Justin Piszcz; +Cc: Andi Kleen, linux-kernel, Eric W. Biederman

On Wednesday, June 6, 2007 3:26 pm Justin Piszcz wrote:
> Nope, I booted with only netconsole= options.  I have a lot of HW in
> the box and I guess the buffer is too small.  Not sure where to
> change it in the kernel.  Looking..

It's called "kernel log buffer size" and it's in "General setup".

Jesse

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-06 22:28             ` Jesse Barnes
@ 2007-06-06 22:31               ` Justin Piszcz
  2007-06-06 22:35               ` Justin Piszcz
                                 ` (3 subsequent siblings)
  4 siblings, 0 replies; 118+ messages in thread
From: Justin Piszcz @ 2007-06-06 22:31 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: Andi Kleen, linux-kernel, Eric W. Biederman



On Wed, 6 Jun 2007, Jesse Barnes wrote:

> On Wednesday, June 6, 2007 3:26 pm Justin Piszcz wrote:
>> Nope, I booted with only netconsole= options.  I have a lot of HW in
>> the box and I guess the buffer is too small.  Not sure where to
>> change it in the kernel.  Looking..
>
> It's called "kernel log buffer size" and it's in "General setup".
>
> Jesse
>

I was reviewing some OLD mailing list archives then!

--------------

Per: http://www.edlug.ed.ac.uk/archive/Aug2003/msg00270.html

The kernel has a cyclic buffer of length LOG_BUF_LEN (4096, since 1.3.54:
8192, since 2.1.113: 16384; in
        recent kernels the size can be set at compile time) in which 
messages
given as argument  to  the  kernel
        function printk() are stored (regardless of their loglevel).

So it doesn't look like you can without recompiling...

Bruce

--------------

Getting you the E820 memory map in a few moments.


Changed from 15 -> 16.

   | |    (16) Kernel log buffer size (16 => 64KB, 17 => 128KB)


Justin.

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-06 22:28             ` Jesse Barnes
  2007-06-06 22:31               ` Justin Piszcz
@ 2007-06-06 22:35               ` Justin Piszcz
  2007-06-06 22:37               ` Randy Dunlap
                                 ` (2 subsequent siblings)
  4 siblings, 0 replies; 118+ messages in thread
From: Justin Piszcz @ 2007-06-06 22:35 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: Andi Kleen, linux-kernel, Eric W. Biederman

[-- Attachment #1: Type: TEXT/PLAIN, Size: 5340 bytes --]



On Wed, 6 Jun 2007, Jesse Barnes wrote:

> On Wednesday, June 6, 2007 3:26 pm Justin Piszcz wrote:
>> Nope, I booted with only netconsole= options.  I have a lot of HW in
>> the box and I guess the buffer is too small.  Not sure where to
>> change it in the kernel.  Looking..
>
> It's called "kernel log buffer size" and it's in "General setup".
>
> Jesse
>

Here ya go (and full dmesg attached)

[    0.000000] Linux version 2.6.22-rc4 (root@p34.internal.lan) (gcc
version 4.1.2 20061115 (prerelease) (Debian 4.1.1-21)) #3 SMP Wed Jun 6
18:30:43 EDT 2007
[    0.000000] Command line: auto BOOT_IMAGE=2.6.22-rc4-3 ro root=902
netconsole=4444@192.168.168.253/eth0,514@192.168.168.254/00:50:8D:ED:3C:E7
[    0.000000] BIOS-provided physical RAM map:
[    0.000000]  BIOS-e820: 0000000000000000 - 000000000008f000 (usable)
[    0.000000]  BIOS-e820: 000000000008f000 - 00000000000a0000
(reserved)
[    0.000000]  BIOS-e820: 00000000000e0000 - 0000000000100000
(reserved)
[    0.000000]  BIOS-e820: 0000000000100000 - 00000000cf58f000 (usable)
[    0.000000]  BIOS-e820: 00000000cf58f000 - 00000000cf59c000
(reserved)
[    0.000000]  BIOS-e820: 00000000cf59c000 - 00000000cf653000 (usable)
[    0.000000]  BIOS-e820: 00000000cf653000 - 00000000cf6a5000 (ACPI
NVS)
[    0.000000]  BIOS-e820: 00000000cf6a5000 - 00000000cf6a8000 (ACPI
data)
[    0.000000]  BIOS-e820: 00000000cf6a8000 - 00000000cf6ef000 (ACPI
NVS)
[    0.000000]  BIOS-e820: 00000000cf6ef000 - 00000000cf6f1000 (ACPI
data)
[    0.000000]  BIOS-e820: 00000000cf6f1000 - 00000000cf6f2000 (usable)
[    0.000000]  BIOS-e820: 00000000cf6f2000 - 00000000cf6ff000 (ACPI
data)
[    0.000000]  BIOS-e820: 00000000cf6ff000 - 00000000cf700000 (usable)
[    0.000000]  BIOS-e820: 00000000cf700000 - 00000000d0000000
(reserved)
[    0.000000]  BIOS-e820: 00000000fff00000 - 0000000100000000
(reserved)
[    0.000000]  BIOS-e820: 0000000100000000 - 000000022c000000 (usable)
[    0.000000] Entering add_active_range(0, 0, 143) 0 entries of 256
used
[    0.000000] Entering add_active_range(0, 256, 849295) 1 entries of
256 used
[    0.000000] Entering add_active_range(0, 849308, 849491) 2 entries of
256 used
[    0.000000] Entering add_active_range(0, 849649, 849650) 3 entries of
256 used
[    0.000000] Entering add_active_range(0, 849663, 849664) 4 entries of
256 used
[    0.000000] Entering add_active_range(0, 1048576, 2277376) 5 entries
of 256 used
[    0.000000] end_pfn_map = 2277376
[    0.000000] ***************
[    0.000000] **** WARNING: likely BIOS bug
[    0.000000] **** MTRRs don't cover all of memory, trimmed 16384 pages
[    0.000000] ***************
[    0.000000] DMI 2.4 present.
[    0.000000] ACPI: RSDP 000FE020, 0014 (r0 INTEL )
[    0.000000] ACPI: RSDT CF6FD038, 0050 (r1 INTEL  DG965WH       64C
1000013)
[    0.000000] ACPI: FACP CF6FC000, 0074 (r1 INTEL  DG965WH       64C
MSFT  1000013)
[    0.000000] ACPI: DSDT CF6F7000, 40E9 (r1 INTEL  DG965WH       64C
MSFT  1000013)
[    0.000000] ACPI: FACS CF6A8000, 0040
[    0.000000] ACPI: APIC CF6F6000, 0078 (r1 INTEL  DG965WH       64C
MSFT  1000013)
[    0.000000] ACPI: WDDT CF6F5000, 0040 (r1 INTEL  DG965WH       64C
MSFT  1000013)
[    0.000000] ACPI: MCFG CF6F4000, 003C (r1 INTEL  DG965WH       64C
MSFT  1000013)
[    0.000000] ACPI: ASF! CF6F3000, 00A6 (r32 INTEL  DG965WH       64C
MSFT  1000013)
[    0.000000] ACPI: HPET CF6F2000, 0038 (r1 INTEL  DG965WH       64C
MSFT  1000013)
[    0.000000] ACPI: SSDT CF6F0000, 01BC (r1 INTEL     CpuPm      64C
MSFT  1000013)
[    0.000000] ACPI: SSDT CF6EF000, 0175 (r1 INTEL   Cpu0Ist      64C
MSFT  1000013)
[    0.000000] ACPI: SSDT CF6A7000, 0175 (r1 INTEL   Cpu1Ist      64C
MSFT  1000013)
[    0.000000] ACPI: SSDT CF6A6000, 0175 (r1 INTEL   Cpu2Ist      64C
MSFT  1000013)
[    0.000000] ACPI: SSDT CF6A5000, 0175 (r1 INTEL   Cpu3Ist      64C
MSFT  1000013)
[    0.000000] Entering add_active_range(0, 0, 143) 0 entries of 256
used
[    0.000000] Entering add_active_range(0, 256, 849295) 1 entries of
256 used
[    0.000000] Entering add_active_range(0, 849308, 849491) 2 entries of
256 used
[    0.000000] Entering add_active_range(0, 849649, 849650) 3 entries of
256 used
[    0.000000] Entering add_active_range(0, 849663, 849664) 4 entries of
256 used
[    0.000000] Entering add_active_range(0, 1048576, 2260992) 5 entries
of 256 used
[    0.000000] Zone PFN ranges:
[    0.000000]   DMA             0 ->     4096
[    0.000000]   DMA32        4096 ->  1048576
[    0.000000]   Normal    1048576 ->  2260992
[    0.000000] early_node_map[6] active PFN ranges
[    0.000000]     0:        0 ->      143
[    0.000000]     0:      256 ->   849295
[    0.000000]     0:   849308 ->   849491
[    0.000000]     0:   849649 ->   849650
[    0.000000]     0:   849663 ->   849664
[    0.000000]     0:  1048576 ->  2260992
[    0.000000] On node 0 totalpages: 2061783
[    0.000000]   DMA zone: 56 pages used for memmap
[    0.000000]   DMA zone: 1395 pages reserved
[    0.000000]   DMA zone: 2532 pages, LIFO batch:0
[    0.000000]   DMA32 zone: 14280 pages used for memmap
[    0.000000]   DMA32 zone: 831104 pages, LIFO batch:31
[    0.000000]   Normal zone: 16576 pages used for memmap
[    0.000000]   Normal zone: 1195840 pages, LIFO batch:31
[    0.000000] ACPI: PM-Timer IO Port: 0x408
[    0.000000] ACPI: Local APIC address 0xfee00000

[-- Attachment #2: Type: TEXT/plain, Size: 49579 bytes --]

[    0.000000] Linux version 2.6.22-rc4 (root@p34.internal.lan) (gcc version 4.1.2 20061115 (prerelease) (Debian 4.1.1-21)) #3 SMP Wed Jun 6 18:30:43 EDT 2007
[    0.000000] Command line: auto BOOT_IMAGE=2.6.22-rc4-3 ro root=902 netconsole=4444@192.168.168.253/eth0,514@192.168.168.254/00:50:8D:ED:3C:E7
[    0.000000] BIOS-provided physical RAM map:
[    0.000000]  BIOS-e820: 0000000000000000 - 000000000008f000 (usable)
[    0.000000]  BIOS-e820: 000000000008f000 - 00000000000a0000 (reserved)
[    0.000000]  BIOS-e820: 00000000000e0000 - 0000000000100000 (reserved)
[    0.000000]  BIOS-e820: 0000000000100000 - 00000000cf58f000 (usable)
[    0.000000]  BIOS-e820: 00000000cf58f000 - 00000000cf59c000 (reserved)
[    0.000000]  BIOS-e820: 00000000cf59c000 - 00000000cf653000 (usable)
[    0.000000]  BIOS-e820: 00000000cf653000 - 00000000cf6a5000 (ACPI NVS)
[    0.000000]  BIOS-e820: 00000000cf6a5000 - 00000000cf6a8000 (ACPI data)
[    0.000000]  BIOS-e820: 00000000cf6a8000 - 00000000cf6ef000 (ACPI NVS)
[    0.000000]  BIOS-e820: 00000000cf6ef000 - 00000000cf6f1000 (ACPI data)
[    0.000000]  BIOS-e820: 00000000cf6f1000 - 00000000cf6f2000 (usable)
[    0.000000]  BIOS-e820: 00000000cf6f2000 - 00000000cf6ff000 (ACPI data)
[    0.000000]  BIOS-e820: 00000000cf6ff000 - 00000000cf700000 (usable)
[    0.000000]  BIOS-e820: 00000000cf700000 - 00000000d0000000 (reserved)
[    0.000000]  BIOS-e820: 00000000fff00000 - 0000000100000000 (reserved)
[    0.000000]  BIOS-e820: 0000000100000000 - 000000022c000000 (usable)
[    0.000000] Entering add_active_range(0, 0, 143) 0 entries of 256 used
[    0.000000] Entering add_active_range(0, 256, 849295) 1 entries of 256 used
[    0.000000] Entering add_active_range(0, 849308, 849491) 2 entries of 256 used
[    0.000000] Entering add_active_range(0, 849649, 849650) 3 entries of 256 used
[    0.000000] Entering add_active_range(0, 849663, 849664) 4 entries of 256 used
[    0.000000] Entering add_active_range(0, 1048576, 2277376) 5 entries of 256 used
[    0.000000] end_pfn_map = 2277376
[    0.000000] ***************
[    0.000000] **** WARNING: likely BIOS bug
[    0.000000] **** MTRRs don't cover all of memory, trimmed 16384 pages
[    0.000000] ***************
[    0.000000] DMI 2.4 present.
[    0.000000] ACPI: RSDP 000FE020, 0014 (r0 INTEL )
[    0.000000] ACPI: RSDT CF6FD038, 0050 (r1 INTEL  DG965WH       64C       1000013)
[    0.000000] ACPI: FACP CF6FC000, 0074 (r1 INTEL  DG965WH       64C MSFT  1000013)
[    0.000000] ACPI: DSDT CF6F7000, 40E9 (r1 INTEL  DG965WH       64C MSFT  1000013)
[    0.000000] ACPI: FACS CF6A8000, 0040
[    0.000000] ACPI: APIC CF6F6000, 0078 (r1 INTEL  DG965WH       64C MSFT  1000013)
[    0.000000] ACPI: WDDT CF6F5000, 0040 (r1 INTEL  DG965WH       64C MSFT  1000013)
[    0.000000] ACPI: MCFG CF6F4000, 003C (r1 INTEL  DG965WH       64C MSFT  1000013)
[    0.000000] ACPI: ASF! CF6F3000, 00A6 (r32 INTEL  DG965WH       64C MSFT  1000013)
[    0.000000] ACPI: HPET CF6F2000, 0038 (r1 INTEL  DG965WH       64C MSFT  1000013)
[    0.000000] ACPI: SSDT CF6F0000, 01BC (r1 INTEL     CpuPm      64C MSFT  1000013)
[    0.000000] ACPI: SSDT CF6EF000, 0175 (r1 INTEL   Cpu0Ist      64C MSFT  1000013)
[    0.000000] ACPI: SSDT CF6A7000, 0175 (r1 INTEL   Cpu1Ist      64C MSFT  1000013)
[    0.000000] ACPI: SSDT CF6A6000, 0175 (r1 INTEL   Cpu2Ist      64C MSFT  1000013)
[    0.000000] ACPI: SSDT CF6A5000, 0175 (r1 INTEL   Cpu3Ist      64C MSFT  1000013)
[    0.000000] Entering add_active_range(0, 0, 143) 0 entries of 256 used
[    0.000000] Entering add_active_range(0, 256, 849295) 1 entries of 256 used
[    0.000000] Entering add_active_range(0, 849308, 849491) 2 entries of 256 used
[    0.000000] Entering add_active_range(0, 849649, 849650) 3 entries of 256 used
[    0.000000] Entering add_active_range(0, 849663, 849664) 4 entries of 256 used
[    0.000000] Entering add_active_range(0, 1048576, 2260992) 5 entries of 256 used
[    0.000000] Zone PFN ranges:
[    0.000000]   DMA             0 ->     4096
[    0.000000]   DMA32        4096 ->  1048576
[    0.000000]   Normal    1048576 ->  2260992
[    0.000000] early_node_map[6] active PFN ranges
[    0.000000]     0:        0 ->      143
[    0.000000]     0:      256 ->   849295
[    0.000000]     0:   849308 ->   849491
[    0.000000]     0:   849649 ->   849650
[    0.000000]     0:   849663 ->   849664
[    0.000000]     0:  1048576 ->  2260992
[    0.000000] On node 0 totalpages: 2061783
[    0.000000]   DMA zone: 56 pages used for memmap
[    0.000000]   DMA zone: 1395 pages reserved
[    0.000000]   DMA zone: 2532 pages, LIFO batch:0
[    0.000000]   DMA32 zone: 14280 pages used for memmap
[    0.000000]   DMA32 zone: 831104 pages, LIFO batch:31
[    0.000000]   Normal zone: 16576 pages used for memmap
[    0.000000]   Normal zone: 1195840 pages, LIFO batch:31
[    0.000000] ACPI: PM-Timer IO Port: 0x408
[    0.000000] ACPI: Local APIC address 0xfee00000
[    0.000000] ACPI: LAPIC (acpi_id[0x01] lapic_id[0x00] enabled)
[    0.000000] Processor #0 (Bootup-CPU)
[    0.000000] ACPI: LAPIC (acpi_id[0x03] lapic_id[0x02] enabled)
[    0.000000] Processor #2
[    0.000000] ACPI: LAPIC (acpi_id[0x02] lapic_id[0x01] enabled)
[    0.000000] Processor #1
[    0.000000] ACPI: LAPIC (acpi_id[0x04] lapic_id[0x03] enabled)
[    0.000000] Processor #3
[    0.000000] ACPI: LAPIC_NMI (acpi_id[0x01] dfl dfl lint[0x1])
[    0.000000] ACPI: LAPIC_NMI (acpi_id[0x02] dfl dfl lint[0x1])
[    0.000000] ACPI: IOAPIC (id[0x02] address[0xfec00000] gsi_base[0])
[    0.000000] IOAPIC[0]: apic_id 2, address 0xfec00000, GSI 0-23
[    0.000000] ACPI: INT_SRC_OVR (bus 0 bus_irq 0 global_irq 2 dfl dfl)
[    0.000000] ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 high level)
[    0.000000] ACPI: IRQ0 used by override.
[    0.000000] ACPI: IRQ2 used by override.
[    0.000000] ACPI: IRQ9 used by override.
[    0.000000] Setting APIC routing to flat
[    0.000000] ACPI: HPET id: 0x8086a201 base: 0xfed00000
[    0.000000] Using ACPI (MADT) for SMP configuration information
[    0.000000] Allocating PCI resources starting at d4000000 (gap: d0000000:2ff00000)
[    0.000000] PERCPU: Allocating 24744 bytes of per cpu data
[    0.000000] Built 1 zonelists.  Total pages: 2029476
[    0.000000] Kernel command line: auto BOOT_IMAGE=2.6.22-rc4-3 ro root=902 netconsole=4444@192.168.168.253/eth0,514@192.168.168.254/00:50:8D:ED:3C:E7
[    0.000000] netconsole: local port 4444
[    0.000000] netconsole: local IP 192.168.168.253
[    0.000000] netconsole: interface eth0
[    0.000000] netconsole: remote port 514
[    0.000000] netconsole: remote IP 192.168.168.254
[    0.000000] netconsole: remote ethernet address 00:50:8d:ed:3c:e7
[    0.000000] Initializing CPU#0
[    0.000000] PID hash table entries: 4096 (order: 12, 32768 bytes)
[   48.079506] time.c: Detected 2397.607 MHz processor.
[   48.080578] Console: colour VGA+ 80x25
[   48.088448] Dentry cache hash table entries: 1048576 (order: 11, 8388608 bytes)
[   48.094647] Inode-cache hash table entries: 524288 (order: 10, 4194304 bytes)
[   48.095721] Checking aperture...
[   48.095768] PCI-DMA: Using software bounce buffering for IO (SWIOTLB)
[   48.132061] Placing software IO TLB between 0x951a000 - 0xd51a000
[   48.205318] Memory: 8039012k/9043968k available (3549k kernel code, 207808k reserved, 1317k data, 216k init)
[   48.264754] Calibrating delay using timer specific routine.. 4797.72 BogoMIPS (lpj=2398863)
[   48.264878] Mount-cache hash table entries: 256
[   48.265005] CPU: L1 I cache: 32K, L1 D cache: 32K
[   48.265068] CPU: L2 cache: 4096K
[   48.265108] using mwait in idle threads.
[   48.265150] CPU: Physical Processor ID: 0
[   48.265191] CPU: Processor Core ID: 0
[   48.265237] CPU0: Thermal monitoring enabled (TM2)
[   48.265284] Freeing SMP alternatives: 33k freed
[   48.265350] ACPI: Core revision 20070126
[   48.278788] Using local APIC timer interrupts.
[   48.320532] result 16650035
[   48.320571] Detected 16.650 MHz APIC timer.
[   48.320815] Booting processor 1/4 APIC 0x2
[   48.331205] Initializing CPU#1
[   48.391721] Calibrating delay using timer specific routine.. 4795.28 BogoMIPS (lpj=2397640)
[   48.391727] CPU: L1 I cache: 32K, L1 D cache: 32K
[   48.391728] CPU: L2 cache: 4096K
[   48.391730] CPU: Physical Processor ID: 0
[   48.391731] CPU: Processor Core ID: 2
[   48.391736] CPU1: Thermal monitoring enabled (TM2)
[   48.392159] Intel(R) Core(TM)2 Quad CPU           @ 2.40GHz stepping 07
[   48.392222] checking TSC synchronization [CPU#0 -> CPU#1]: passed.
[   48.412781] Booting processor 2/4 APIC 0x1
[   48.423528] Initializing CPU#2
[   48.483700] Calibrating delay using timer specific routine.. 4795.23 BogoMIPS (lpj=2397616)
[   48.483706] CPU: L1 I cache: 32K, L1 D cache: 32K
[   48.483708] CPU: L2 cache: 4096K
[   48.483710] CPU: Physical Processor ID: 0
[   48.483711] CPU: Processor Core ID: 1
[   48.483716] CPU2: Thermal monitoring enabled (TM2)
[   48.484157] Intel(R) Core(TM)2 Quad CPU           @ 2.40GHz stepping 07
[   48.484186] checking TSC synchronization [CPU#0 -> CPU#2]: passed.
[   48.504779] Booting processor 3/4 APIC 0x3
[   48.515172] Initializing CPU#3
[   48.575678] Calibrating delay using timer specific routine.. 4795.27 BogoMIPS (lpj=2397638)
[   48.575684] CPU: L1 I cache: 32K, L1 D cache: 32K
[   48.575686] CPU: L2 cache: 4096K
[   48.575688] CPU: Physical Processor ID: 0
[   48.575689] CPU: Processor Core ID: 3
[   48.575694] CPU3: Thermal monitoring enabled (TM2)
[   48.576114] Intel(R) Core(TM)2 Quad CPU           @ 2.40GHz stepping 07
[   48.576188] checking TSC synchronization [CPU#0 -> CPU#3]: passed.
[   48.596679] Brought up 4 CPUs
[   49.216748] migration_cost=17,3461
[   49.217116] NET: Registered protocol family 16
[   49.217217] ACPI: bus type pci registered
[   49.217266] PCI: BIOS Bug: MCFG area at f0000000 is not E820-reserved
[   49.217316] PCI: Not using MMCONFIG.
[   49.217368] PCI: Using configuration type 1
[   49.220054] ACPI: Interpreter enabled
[   49.220096] ACPI: Using IOAPIC for interrupt routing
[   49.223600] ACPI: PCI Root Bridge [PCI0] (0000:00)
[   49.223659] PCI: Probing PCI hardware (bus 00)
[   49.224313] PCI quirk: region 0400-047f claimed by ICH6 ACPI/GPIO/TCO
[   49.224361] PCI quirk: region 0500-053f claimed by ICH6 GPIO
[   49.225260] PCI: Transparent bridge - 0000:00:1e.0
[   49.225372] ACPI: PCI Interrupt Routing Table [\_SB_.PCI0._PRT]
[   49.225650] ACPI: PCI Interrupt Routing Table [\_SB_.PCI0.P32_._PRT]
[   49.225851] ACPI: PCI Interrupt Routing Table [\_SB_.PCI0.PEX0._PRT]
[   49.225931] ACPI: PCI Interrupt Routing Table [\_SB_.PCI0.PEX1._PRT]
[   49.226010] ACPI: PCI Interrupt Routing Table [\_SB_.PCI0.PEX2._PRT]
[   49.226090] ACPI: PCI Interrupt Routing Table [\_SB_.PCI0.PEX3._PRT]
[   49.226169] ACPI: PCI Interrupt Routing Table [\_SB_.PCI0.PEX4._PRT]
[   49.229348] ACPI: PCI Interrupt Link [LNKA] (IRQs 3 4 5 7 9 10 *11 12)
[   49.229638] ACPI: PCI Interrupt Link [LNKB] (IRQs 3 4 5 7 9 *10 11 12)
[   49.229924] ACPI: PCI Interrupt Link [LNKC] (IRQs 3 4 5 7 9 10 *11 12)
[   49.230250] ACPI: PCI Interrupt Link [LNKD] (IRQs 3 4 5 7 9 10 *11 12)
[   49.230548] ACPI: PCI Interrupt Link [LNKE] (IRQs 3 4 5 7 *9 10 11 12)
[   49.230834] ACPI: PCI Interrupt Link [LNKF] (IRQs 3 4 5 7 9 *10 11 12)
[   49.231141] ACPI: PCI Interrupt Link [LNKG] (IRQs 3 4 5 7 *9 10 11 12)
[   49.231448] ACPI: PCI Interrupt Link [LNKH] (IRQs 3 4 5 7 9 10 *11 12)
[   49.231825] Linux Plug and Play Support v0.97 (c) Adam Belay
[   49.231875] pnp: PnP ACPI init
[   49.231921] ACPI: bus type pnp registered
[   49.234206] pnp: PnP ACPI: found 12 devices
[   49.234261] ACPI: ACPI bus type pnp unregistered
[   49.234441] SCSI subsystem initialized
[   49.234541] libata version 2.21 loaded.
[   49.234634] usbcore: registered new interface driver usbfs
[   49.234730] usbcore: registered new interface driver hub
[   49.234848] usbcore: registered new device driver usb
[   49.234931] PCI: Using ACPI for IRQ routing
[   49.234973] PCI: If a device doesn't work, try "pci=routeirq".  If it helps, post a report
[   49.235126] PCI-GART: No AMD northbridge found.
[   49.235172] hpet0: at MMIO 0xfed00000, IRQs 2, 8, 0
[   49.235286] hpet0: 3 64-bit timers, 14318180 Hz
[   49.236430] pnp: 00:01: iomem range 0xf0000000-0xf7ffffff has been reserved
[   49.236491] pnp: 00:01: iomem range 0xfed13000-0xfed13fff has been reserved
[   49.236541] Time: tsc clocksource has been installed.
[   49.236597] pnp: 00:01: iomem range 0xfed14000-0xfed17fff has been reserved
[   49.236655] pnp: 00:01: iomem range 0xfed18000-0xfed18fff has been reserved
[   49.236706] pnp: 00:06: ioport range 0x500-0x53f has been reserved
[   49.236773] pnp: 00:06: ioport range 0x400-0x47f has been reserved
[   49.237372] pnp: 00:06: ioport range 0x680-0x6ff has been reserved
[   49.237848] ACPI: PCI Interrupt 0000:06:03.0[A] -> GSI 19 (level, low) -> IRQ 19
[   49.288159] ohci1394: fw-host0: OHCI-1394 1.1 (PCI): IRQ=[19]  MMIO=[e0084000-e00847ff]  Max Packet=[2048]  IR/IT contexts=[4/8]
[   49.288289] PCI: Bridge: 0000:00:1c.0
[   49.288330]   IO window: disabled.
[   49.288373]   MEM window: e0700000-e07fffff
[   49.288416]   PREFETCH window: disabled.
[   49.288460] PCI: Bridge: 0000:00:1c.1
[   49.288501]   IO window: 5000-5fff
[   49.288545]   MEM window: e0400000-e04fffff
[   49.288599]   PREFETCH window: disabled.
[   49.288652] PCI: Bridge: 0000:00:1c.2
[   49.288694]   IO window: 4000-4fff
[   49.288736]   MEM window: e0300000-e03fffff
[   49.288800]   PREFETCH window: e0800000-e08fffff
[   49.288846] PCI: Bridge: 0000:00:1c.3
[   49.288887]   IO window: 3000-3fff
[   49.288930]   MEM window: e0200000-e02fffff
[   49.288973]   PREFETCH window: e0900000-e09fffff
[   49.289018] PCI: Bridge: 0000:00:1c.4
[   49.289060]   IO window: 2000-2fff
[   49.289102]   MEM window: e0100000-e01fffff
[   49.289146]   PREFETCH window: e0a00000-e0afffff
[   49.289192] PCI: Bridge: 0000:00:1e.0
[   49.289233]   IO window: 1000-1fff
[   49.289276]   MEM window: e0000000-e00fffff
[   49.289319]   PREFETCH window: e0b00000-e0bfffff
[   49.289376] ACPI: PCI Interrupt 0000:00:1c.0[A] -> GSI 17 (level, low) -> IRQ 17
[   49.289461] PCI: Setting latency timer of device 0000:00:1c.0 to 64
[   49.289474] ACPI: PCI Interrupt 0000:00:1c.1[B] -> GSI 16 (level, low) -> IRQ 16
[   49.289559] PCI: Setting latency timer of device 0000:00:1c.1 to 64
[   49.289573] ACPI: PCI Interrupt 0000:00:1c.2[C] -> GSI 18 (level, low) -> IRQ 18
[   49.289677] PCI: Setting latency timer of device 0000:00:1c.2 to 64
[   49.289690] ACPI: PCI Interrupt 0000:00:1c.3[D] -> GSI 19 (level, low) -> IRQ 19
[   49.289773] PCI: Setting latency timer of device 0000:00:1c.3 to 64
[   49.289785] ACPI: PCI Interrupt 0000:00:1c.4[A] -> GSI 17 (level, low) -> IRQ 17
[   49.289889] PCI: Setting latency timer of device 0000:00:1c.4 to 64
[   49.289897] PCI: Setting latency timer of device 0000:00:1e.0 to 64
[   49.289944] NET: Registered protocol family 2
[   49.300806] IP route cache hash table entries: 262144 (order: 9, 2097152 bytes)
[   49.301356] TCP established hash table entries: 131072 (order: 9, 3145728 bytes)
[   49.302625] TCP bind hash table entries: 65536 (order: 8, 1048576 bytes)
[   49.303110] TCP: Hash tables configured (established 131072 bind 65536)
[   49.303158] TCP reno registered
[   49.307229] Installing knfsd (copyright (C) 1996 okir@monad.swb.de).
[   49.307421] SGI XFS with large block/inode numbers, no debug enabled
[   49.307761] io scheduler noop registered
[   49.307813] io scheduler anticipatory registered (default)
[   49.307863] Boot video device is 0000:00:02.0
[   49.308278] PCI: Setting latency timer of device 0000:00:1c.0 to 64
[   49.308309] assign_interrupt_mode Found MSI capability
[   49.308380] Allocate Port Service[0000:00:1c.0:pcie00]
[   49.308402] Allocate Port Service[0000:00:1c.0:pcie02]
[   49.308460] PCI: Setting latency timer of device 0000:00:1c.1 to 64
[   49.308491] assign_interrupt_mode Found MSI capability
[   49.308569] Allocate Port Service[0000:00:1c.1:pcie00]
[   49.308602] Allocate Port Service[0000:00:1c.1:pcie02]
[   49.308658] PCI: Setting latency timer of device 0000:00:1c.2 to 64
[   49.308689] assign_interrupt_mode Found MSI capability
[   49.308757] Allocate Port Service[0000:00:1c.2:pcie00]
[   49.308777] Allocate Port Service[0000:00:1c.2:pcie02]
[   49.308835] PCI: Setting latency timer of device 0000:00:1c.3 to 64
[   49.308866] assign_interrupt_mode Found MSI capability
[   49.308933] Allocate Port Service[0000:00:1c.3:pcie00]
[   49.308956] Allocate Port Service[0000:00:1c.3:pcie02]
[   49.309015] PCI: Setting latency timer of device 0000:00:1c.4 to 64
[   49.309045] assign_interrupt_mode Found MSI capability
[   49.309134] Allocate Port Service[0000:00:1c.4:pcie00]
[   49.309156] Allocate Port Service[0000:00:1c.4:pcie02]
[   49.310808] lp: driver loaded but no devices found
[   49.310888] Real Time Clock Driver v1.12ac
[   49.311009] hpet_resources: 0xfed00000 is busy
[   49.311027] Linux agpgart interface v0.102 (c) Dave Jones
[   49.311105] agpgart: Detected an Intel 965G Chipset.
[   49.312363] agpgart: Detected 7676K stolen memory.
[   49.324807] agpgart: AGP aperture is 256M @ 0xd0000000
[   49.324885] [drm] Initialized drm 1.1.0 20060810
[   49.324944] ACPI: PCI Interrupt 0000:00:02.0[A] -> GSI 16 (level, low) -> IRQ 16
[   49.325066] [drm] Initialized i915 1.6.0 20060119 on minor 0
[   49.325167] parport_pc 00:07: reported by Plug and Play ACPI
[   49.325285] parport0: PC-style at 0x378 (0x778), irq 7, using FIFO [PCSPP,TRISTATE,COMPAT,ECP]
[   49.408131] lp0: using parport0 (interrupt-driven).
[   49.408298] loop: module loaded
[   49.408338] Intel(R) PRO/1000 Network Driver - version 7.3.20-k2-NAPI
[   49.408385] Copyright (c) 1999-2006 Intel Corporation.
[   49.408457] ACPI: PCI Interrupt 0000:00:19.0[A] -> GSI 20 (level, low) -> IRQ 20
[   49.408565] PCI: Setting latency timer of device 0000:00:19.0 to 64
[   49.427147] e1000: 0000:00:19.0: e1000_probe: (PCI Express:2.5Gb/s:Width x1) 00:19:d1:6e:9d:43
[   49.502455] e1000: eth0: e1000_probe: Intel(R) PRO/1000 Network Connection
[   49.502543] ACPI: PCI Interrupt 0000:06:00.0[A] -> GSI 21 (level, low) -> IRQ 21
[   49.764949] e1000: 0000:06:00.0: e1000_probe: (PCI:33MHz:32-bit) 00:0e:0c:00:cd:b1
[   49.911684] e1000: eth1: e1000_probe: Intel(R) PRO/1000 Network Connection
[   49.911744] ACPI: PCI Interrupt 0000:06:01.0[A] -> GSI 22 (level, low) -> IRQ 22
[   50.158693] e1000: 0000:06:01.0: e1000_probe: (PCI:33MHz:32-bit) 00:07:e9:29:37:db
[   50.184398] e1000: eth2: e1000_probe: Intel(R) PRO/1000 Network Connection
[   50.184493] netconsole: device eth0 not up yet, forcing it
[   50.544713] ieee1394: Host added: ID:BUS[0-00:1023]  GUID[0090270001c5be9f]
[   52.444887] e1000: eth0: e1000_watchdog: NIC Link is Up 1000 Mbps Full Duplex, Flow Control: RX/TX
[   52.468712] netconsole: network logging started
[   52.468869] ahci 0000:00:1f.2: version 2.2
[   52.468883] ACPI: PCI Interrupt 0000:00:1f.2[A] -> GSI 19 (level, low) -> IRQ 19
[   53.470122] ahci 0000:00:1f.2: AHCI 0001.0100 32 slots 6 ports 3 Gbps 0x3f impl SATA mode
[   53.470195] ahci 0000:00:1f.2: flags: 64bit ncq led clo pio slum part 
[   53.470269] PCI: Setting latency timer of device 0000:00:1f.2 to 64
[   53.470484] scsi0 : ahci
[   53.470599] scsi1 : ahci
[   53.470694] scsi2 : ahci
[   53.470792] scsi3 : ahci
[   53.470892] scsi4 : ahci
[   53.471005] scsi5 : ahci
[   53.471098] ata1: SATA max UDMA/133 cmd 0xffffc2000002e100 ctl 0x0000000000000000 bmdma 0x0000000000000000 irq 0
[   53.471178] ata2: SATA max UDMA/133 cmd 0xffffc2000002e180 ctl 0x0000000000000000 bmdma 0x0000000000000000 irq 0
[   53.471256] ata3: SATA max UDMA/133 cmd 0xffffc2000002e200 ctl 0x0000000000000000 bmdma 0x0000000000000000 irq 0
[   53.471334] ata4: SATA max UDMA/133 cmd 0xffffc2000002e280 ctl 0x0000000000000000 bmdma 0x0000000000000000 irq 0
[   53.471412] ata5: SATA max UDMA/133 cmd 0xffffc2000002e300 ctl 0x0000000000000000 bmdma 0x0000000000000000 irq 0
[   53.471498] ata6: SATA max UDMA/133 cmd 0xffffc2000002e380 ctl 0x0000000000000000 bmdma 0x0000000000000000 irq 0
[   53.981909] ata1: SATA link up 1.5 Gbps (SStatus 113 SControl 300)
[   53.985309] ata1.00: ata_hpa_resize 1: sectors = 145226112, hpa_sectors = 145226112
[   53.985379] ata1.00: ATA-6: WDC WD740GD-00FLC0, 33.08F33, max UDMA/133
[   53.985436] ata1.00: 145226112 sectors, multi 0: LBA48 
[   53.989871] ata1.00: ata_hpa_resize 1: sectors = 145226112, hpa_sectors = 145226112
[   53.989943] ata1.00: configured for UDMA/133
[   54.448246] ata2: SATA link up 1.5 Gbps (SStatus 113 SControl 300)
[   54.451681] ata2.00: ata_hpa_resize 1: sectors = 145226112, hpa_sectors = 145226112
[   54.451751] ata2.00: ATA-6: WDC WD740GD-00FLC0, 33.08F33, max UDMA/133
[   54.451806] ata2.00: 145226112 sectors, multi 0: LBA48 
[   54.456220] ata2.00: ata_hpa_resize 1: sectors = 145226112, hpa_sectors = 145226112
[   54.456291] ata2.00: configured for UDMA/133
[   54.915523] ata3: SATA link up 1.5 Gbps (SStatus 113 SControl 300)
[   54.917712] ata3.00: ata_hpa_resize 1: sectors = 293046768, hpa_sectors = 293046768
[   54.917783] ata3.00: ATA-7: WDC WD1500ADFD-00NLR1, 20.07P20, max UDMA/133
[   54.917856] ata3.00: 293046768 sectors, multi 0: LBA48 NCQ (depth 31/32)
[   54.920699] ata3.00: ata_hpa_resize 1: sectors = 293046768, hpa_sectors = 293046768
[   54.920771] ata3.00: configured for UDMA/133
[   55.378761] ata4: SATA link up 1.5 Gbps (SStatus 113 SControl 300)
[   55.380949] ata4.00: ata_hpa_resize 1: sectors = 293046768, hpa_sectors = 293046768
[   55.381029] ata4.00: ATA-7: WDC WD1500ADFD-00NLR1, 20.07P20, max UDMA/133
[   55.381100] ata4.00: 293046768 sectors, multi 0: LBA48 NCQ (depth 31/32)
[   55.383936] ata4.00: ata_hpa_resize 1: sectors = 293046768, hpa_sectors = 293046768
[   55.384008] ata4.00: configured for UDMA/133
[   55.843019] ata5: SATA link up 1.5 Gbps (SStatus 113 SControl 300)
[   55.845093] ata5.00: ata_hpa_resize 1: sectors = 293046768, hpa_sectors = 293046768
[   55.845164] ata5.00: ATA-7: WDC WD1500ADFD-00NLR1, 20.07P20, max UDMA/133
[   55.845230] ata5.00: 293046768 sectors, multi 0: LBA48 NCQ (depth 31/32)
[   55.847887] ata5.00: ata_hpa_resize 1: sectors = 293046768, hpa_sectors = 293046768
[   55.847960] ata5.00: configured for UDMA/133
[   56.307338] ata6: SATA link up 1.5 Gbps (SStatus 113 SControl 300)
[   56.309375] ata6.00: ata_hpa_resize 1: sectors = 293046768, hpa_sectors = 293046768
[   56.309446] ata6.00: ATA-7: WDC WD1500ADFD-00NLR1, 20.07P20, max UDMA/133
[   56.309502] ata6.00: 293046768 sectors, multi 0: LBA48 NCQ (depth 31/32)
[   56.312166] ata6.00: ata_hpa_resize 1: sectors = 293046768, hpa_sectors = 293046768
[   56.312238] ata6.00: configured for UDMA/133
[   56.312366] scsi 0:0:0:0: Direct-Access     ATA      WDC WD740GD-00FL 33.0 PQ: 0 ANSI: 5
[   56.312519] sd 0:0:0:0: [sda] 145226112 512-byte hardware sectors (74356 MB)
[   56.312587] sd 0:0:0:0: [sda] Write Protect is off
[   56.312643] sd 0:0:0:0: [sda] Mode Sense: 00 3a 00 00
[   56.312654] sd 0:0:0:0: [sda] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[   56.312764] sd 0:0:0:0: [sda] 145226112 512-byte hardware sectors (74356 MB)
[   56.312830] sd 0:0:0:0: [sda] Write Protect is off
[   56.312888] sd 0:0:0:0: [sda] Mode Sense: 00 3a 00 00
[   56.312899] sd 0:0:0:0: [sda] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[   56.312989]  sda: sda1 sda2 sda3
[   56.322761] sd 0:0:0:0: [sda] Attached SCSI disk
[   56.322978] scsi 1:0:0:0: Direct-Access     ATA      WDC WD740GD-00FL 33.0 PQ: 0 ANSI: 5
[   56.323159] sd 1:0:0:0: [sdb] 145226112 512-byte hardware sectors (74356 MB)
[   56.323221] sd 1:0:0:0: [sdb] Write Protect is off
[   56.323274] sd 1:0:0:0: [sdb] Mode Sense: 00 3a 00 00
[   56.323289] sd 1:0:0:0: [sdb] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[   56.323398] sd 1:0:0:0: [sdb] 145226112 512-byte hardware sectors (74356 MB)
[   56.323462] sd 1:0:0:0: [sdb] Write Protect is off
[   56.323513] sd 1:0:0:0: [sdb] Mode Sense: 00 3a 00 00
[   56.323528] sd 1:0:0:0: [sdb] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[   56.323632]  sdb: sdb1 sdb2 sdb3
[   56.330482] sd 1:0:0:0: [sdb] Attached SCSI disk
[   56.330653] scsi 2:0:0:0: Direct-Access     ATA      WDC WD1500ADFD-0 20.0 PQ: 0 ANSI: 5
[   56.330826] sd 2:0:0:0: [sdc] 293046768 512-byte hardware sectors (150040 MB)
[   56.330886] sd 2:0:0:0: [sdc] Write Protect is off
[   56.330942] sd 2:0:0:0: [sdc] Mode Sense: 00 3a 00 00
[   56.330958] sd 2:0:0:0: [sdc] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[   56.331061] sd 2:0:0:0: [sdc] 293046768 512-byte hardware sectors (150040 MB)
[   56.331128] sd 2:0:0:0: [sdc] Write Protect is off
[   56.331176] sd 2:0:0:0: [sdc] Mode Sense: 00 3a 00 00
[   56.331191] sd 2:0:0:0: [sdc] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[   56.331295]  sdc: sdc1
[   56.335795] sd 2:0:0:0: [sdc] Attached SCSI disk
[   56.335938] scsi 3:0:0:0: Direct-Access     ATA      WDC WD1500ADFD-0 20.0 PQ: 0 ANSI: 5
[   56.336132] sd 3:0:0:0: [sdd] 293046768 512-byte hardware sectors (150040 MB)
[   56.336206] sd 3:0:0:0: [sdd] Write Protect is off
[   56.336253] sd 3:0:0:0: [sdd] Mode Sense: 00 3a 00 00
[   56.336263] sd 3:0:0:0: [sdd] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[   56.336374] sd 3:0:0:0: [sdd] 293046768 512-byte hardware sectors (150040 MB)
[   56.336431] sd 3:0:0:0: [sdd] Write Protect is off
[   56.336478] sd 3:0:0:0: [sdd] Mode Sense: 00 3a 00 00
[   56.336488] sd 3:0:0:0: [sdd] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[   56.336580]  sdd: sdd1
[   56.343788] sd 3:0:0:0: [sdd] Attached SCSI disk
[   56.343935] scsi 4:0:0:0: Direct-Access     ATA      WDC WD1500ADFD-0 20.0 PQ: 0 ANSI: 5
[   56.344106] sd 4:0:0:0: [sde] 293046768 512-byte hardware sectors (150040 MB)
[   56.344166] sd 4:0:0:0: [sde] Write Protect is off
[   56.344220] sd 4:0:0:0: [sde] Mode Sense: 00 3a 00 00
[   56.344235] sd 4:0:0:0: [sde] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[   56.344339] sd 4:0:0:0: [sde] 293046768 512-byte hardware sectors (150040 MB)
[   56.344403] sd 4:0:0:0: [sde] Write Protect is off
[   56.344454] sd 4:0:0:0: [sde] Mode Sense: 00 3a 00 00
[   56.344469] sd 4:0:0:0: [sde] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[   56.344573]  sde: sde1
[   56.355511] sd 4:0:0:0: [sde] Attached SCSI disk
[   56.355657] scsi 5:0:0:0: Direct-Access     ATA      WDC WD1500ADFD-0 20.0 PQ: 0 ANSI: 5
[   56.355824] sd 5:0:0:0: [sdf] 293046768 512-byte hardware sectors (150040 MB)
[   56.355884] sd 5:0:0:0: [sdf] Write Protect is off
[   56.355937] sd 5:0:0:0: [sdf] Mode Sense: 00 3a 00 00
[   56.355952] sd 5:0:0:0: [sdf] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[   56.356059] sd 5:0:0:0: [sdf] 293046768 512-byte hardware sectors (150040 MB)
[   56.356126] sd 5:0:0:0: [sdf] Write Protect is off
[   56.356174] sd 5:0:0:0: [sdf] Mode Sense: 00 3a 00 00
[   56.356189] sd 5:0:0:0: [sdf] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[   56.356293]  sdf: sdf1
[   56.361893] sd 5:0:0:0: [sdf] Attached SCSI disk
[   56.362122] sata_sil24 0000:03:00.0: version 0.9
[   56.362153] ACPI: PCI Interrupt 0000:03:00.0[A] -> GSI 18 (level, low) -> IRQ 18
[   56.362327] PCI: Setting latency timer of device 0000:03:00.0 to 64
[   56.362382] scsi6 : sata_sil24
[   56.362501] scsi7 : sata_sil24
[   56.362616] ata7: SATA max UDMA/100 cmd 0xffffc20000038000 ctl 0x0000000000000000 bmdma 0x0000000000000000 irq 0
[   56.362703] ata8: SATA max UDMA/100 cmd 0xffffc2000003a000 ctl 0x0000000000000000 bmdma 0x0000000000000000 irq 0
[   56.766751] ata7: SATA link up 1.5 Gbps (SStatus 113 SControl 300)
[   56.769241] ata7.00: ata_hpa_resize 1: sectors = 293046768, hpa_sectors = 293046768
[   56.769313] ata7.00: ATA-7: WDC WD1500ADFD-00NLR1, 20.07P20, max UDMA/133
[   56.769369] ata7.00: 293046768 sectors, multi 16: LBA48 NCQ (depth 31/32)
[   56.772597] ata7.00: ata_hpa_resize 1: sectors = 293046768, hpa_sectors = 293046768
[   56.772670] ata7.00: configured for UDMA/100
[   57.176868] ata8: SATA link up 1.5 Gbps (SStatus 113 SControl 300)
[   57.179393] ata8.00: ata_hpa_resize 1: sectors = 293046768, hpa_sectors = 293046768
[   57.179460] ata8.00: ATA-7: WDC WD1500ADFD-00NLR1, 20.07P20, max UDMA/133
[   57.179511] ata8.00: 293046768 sectors, multi 16: LBA48 NCQ (depth 31/32)
[   57.182775] ata8.00: ata_hpa_resize 1: sectors = 293046768, hpa_sectors = 293046768
[   57.182847] ata8.00: configured for UDMA/100
[   57.182955] scsi 6:0:0:0: Direct-Access     ATA      WDC WD1500ADFD-0 20.0 PQ: 0 ANSI: 5
[   57.183126] sd 6:0:0:0: [sdg] 293046768 512-byte hardware sectors (150040 MB)
[   57.183191] sd 6:0:0:0: [sdg] Write Protect is off
[   57.183238] sd 6:0:0:0: [sdg] Mode Sense: 00 3a 00 00
[   57.183248] sd 6:0:0:0: [sdg] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[   57.183361] sd 6:0:0:0: [sdg] 293046768 512-byte hardware sectors (150040 MB)
[   57.183418] sd 6:0:0:0: [sdg] Write Protect is off
[   57.183485] sd 6:0:0:0: [sdg] Mode Sense: 00 3a 00 00
[   57.183495] sd 6:0:0:0: [sdg] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[   57.183566]  sdg: sdg1
[   57.194888] sd 6:0:0:0: [sdg] Attached SCSI disk
[   57.195063] scsi 7:0:0:0: Direct-Access     ATA      WDC WD1500ADFD-0 20.0 PQ: 0 ANSI: 5
[   57.195213] sd 7:0:0:0: [sdh] 293046768 512-byte hardware sectors (150040 MB)
[   57.195275] sd 7:0:0:0: [sdh] Write Protect is off
[   57.195327] sd 7:0:0:0: [sdh] Mode Sense: 00 3a 00 00
[   57.195342] sd 7:0:0:0: [sdh] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[   57.195470] sd 7:0:0:0: [sdh] 293046768 512-byte hardware sectors (150040 MB)
[   57.195526] sd 7:0:0:0: [sdh] Write Protect is off
[   57.195593] sd 7:0:0:0: [sdh] Mode Sense: 00 3a 00 00
[   57.195603] sd 7:0:0:0: [sdh] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[   57.195675]  sdh: sdh1
[   57.206857] sd 7:0:0:0: [sdh] Attached SCSI disk
[   57.206966] ACPI: PCI Interrupt 0000:04:00.0[A] -> GSI 19 (level, low) -> IRQ 19
[   57.207106] PCI: Setting latency timer of device 0000:04:00.0 to 64
[   57.207188] scsi8 : sata_sil24
[   57.207305] scsi9 : sata_sil24
[   57.207422] ata9: SATA max UDMA/100 cmd 0xffffc20000068000 ctl 0x0000000000000000 bmdma 0x0000000000000000 irq 0
[   57.207507] ata10: SATA max UDMA/100 cmd 0xffffc2000006a000 ctl 0x0000000000000000 bmdma 0x0000000000000000 irq 0
[   57.612628] ata9: SATA link up 1.5 Gbps (SStatus 113 SControl 300)
[   57.614742] ata9.00: ata_hpa_resize 1: sectors = 293046768, hpa_sectors = 293046768
[   57.614823] ata9.00: ATA-7: WDC WD1500ADFD-00NLR1, 20.07P20, max UDMA/133
[   57.614874] ata9.00: 293046768 sectors, multi 16: LBA48 NCQ (depth 31/32)
[   57.617628] ata9.00: ata_hpa_resize 1: sectors = 293046768, hpa_sectors = 293046768
[   57.617700] ata9.00: configured for UDMA/100
[   58.022845] ata10: SATA link up 1.5 Gbps (SStatus 113 SControl 300)
[   58.024964] ata10.00: ata_hpa_resize 1: sectors = 293046768, hpa_sectors = 293046768
[   58.025035] ata10.00: ATA-7: WDC WD1500ADFD-00NLR1, 20.07P20, max UDMA/133
[   58.025100] ata10.00: 293046768 sectors, multi 16: LBA48 NCQ (depth 31/32)
[   58.027875] ata10.00: ata_hpa_resize 1: sectors = 293046768, hpa_sectors = 293046768
[   58.027949] ata10.00: configured for UDMA/100
[   58.028056] scsi 8:0:0:0: Direct-Access     ATA      WDC WD1500ADFD-0 20.0 PQ: 0 ANSI: 5
[   58.028242] sd 8:0:0:0: [sdi] 293046768 512-byte hardware sectors (150040 MB)
[   58.028302] sd 8:0:0:0: [sdi] Write Protect is off
[   58.028354] sd 8:0:0:0: [sdi] Mode Sense: 00 3a 00 00
[   58.028370] sd 8:0:0:0: [sdi] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[   58.028474] sd 8:0:0:0: [sdi] 293046768 512-byte hardware sectors (150040 MB)
[   58.028540] sd 8:0:0:0: [sdi] Write Protect is off
[   58.028591] sd 8:0:0:0: [sdi] Mode Sense: 00 3a 00 00
[   58.028606] sd 8:0:0:0: [sdi] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[   58.028710]  sdi: sdi1
[   58.038904] sd 8:0:0:0: [sdi] Attached SCSI disk
[   58.039042] scsi 9:0:0:0: Direct-Access     ATA      WDC WD1500ADFD-0 20.0 PQ: 0 ANSI: 5
[   58.039193] sd 9:0:0:0: [sdj] 293046768 512-byte hardware sectors (150040 MB)
[   58.039253] sd 9:0:0:0: [sdj] Write Protect is off
[   58.039305] sd 9:0:0:0: [sdj] Mode Sense: 00 3a 00 00
[   58.039320] sd 9:0:0:0: [sdj] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[   58.039454] sd 9:0:0:0: [sdj] 293046768 512-byte hardware sectors (150040 MB)
[   58.039519] sd 9:0:0:0: [sdj] Write Protect is off
[   58.039566] sd 9:0:0:0: [sdj] Mode Sense: 00 3a 00 00
[   58.039576] sd 9:0:0:0: [sdj] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[   58.039668]  sdj: sdj1
[   58.047213] sd 9:0:0:0: [sdj] Attached SCSI disk
[   58.047352] ACPI: PCI Interrupt 0000:05:00.0[A] -> GSI 16 (level, low) -> IRQ 16
[   58.047504] PCI: Setting latency timer of device 0000:05:00.0 to 64
[   58.047552] scsi10 : sata_sil24
[   58.047668] scsi11 : sata_sil24
[   58.047787] ata11: SATA max UDMA/100 cmd 0xffffc20000070000 ctl 0x0000000000000000 bmdma 0x0000000000000000 irq 0
[   58.047872] ata12: SATA max UDMA/100 cmd 0xffffc20000072000 ctl 0x0000000000000000 bmdma 0x0000000000000000 irq 0
[   58.452485] ata11: SATA link up 1.5 Gbps (SStatus 113 SControl 300)
[   58.454979] ata11.00: ata_hpa_resize 1: sectors = 293046768, hpa_sectors = 293046768
[   58.455050] ata11.00: ATA-7: WDC WD1500ADFD-00NLR1, 20.07P20, max UDMA/133
[   58.455135] ata11.00: 293046768 sectors, multi 16: LBA48 NCQ (depth 31/32)
[   58.458386] ata11.00: ata_hpa_resize 1: sectors = 293046768, hpa_sectors = 293046768
[   58.458459] ata11.00: configured for UDMA/100
[   58.862662] ata12: SATA link up 1.5 Gbps (SStatus 113 SControl 300)
[   58.865167] ata12.00: ata_hpa_resize 1: sectors = 293046768, hpa_sectors = 293046768
[   58.865238] ata12.00: ATA-7: WDC WD1500ADFD-00NLR1, 20.07P20, max UDMA/133
[   58.865294] ata12.00: 293046768 sectors, multi 16: LBA48 NCQ (depth 31/32)
[   58.868544] ata12.00: ata_hpa_resize 1: sectors = 293046768, hpa_sectors = 293046768
[   58.868617] ata12.00: configured for UDMA/100
[   58.868723] scsi 10:0:0:0: Direct-Access     ATA      WDC WD1500ADFD-0 20.0 PQ: 0 ANSI: 5
[   58.868881] sd 10:0:0:0: [sdk] 293046768 512-byte hardware sectors (150040 MB)
[   58.868983] sd 10:0:0:0: [sdk] Write Protect is off
[   58.869031] sd 10:0:0:0: [sdk] Mode Sense: 00 3a 00 00
[   58.869041] sd 10:0:0:0: [sdk] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[   58.869152] sd 10:0:0:0: [sdk] 293046768 512-byte hardware sectors (150040 MB)
[   58.869237] sd 10:0:0:0: [sdk] Write Protect is off
[   58.869292] sd 10:0:0:0: [sdk] Mode Sense: 00 3a 00 00
[   58.869302] sd 10:0:0:0: [sdk] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[   58.869374]  sdk: sdk1
[   58.877168] sd 10:0:0:0: [sdk] Attached SCSI disk
[   58.877349] scsi 11:0:0:0: Direct-Access     ATA      WDC WD1500ADFD-0 20.0 PQ: 0 ANSI: 5
[   58.877517] sd 11:0:0:0: [sdl] 293046768 512-byte hardware sectors (150040 MB)
[   58.877593] sd 11:0:0:0: [sdl] Write Protect is off
[   58.877645] sd 11:0:0:0: [sdl] Mode Sense: 00 3a 00 00
[   58.877661] sd 11:0:0:0: [sdl] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[   58.877767] sd 11:0:0:0: [sdl] 293046768 512-byte hardware sectors (150040 MB)
[   58.877847] sd 11:0:0:0: [sdl] Write Protect is off
[   58.877898] sd 11:0:0:0: [sdl] Mode Sense: 00 3a 00 00
[   58.877913] sd 11:0:0:0: [sdl] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[   58.878019]  sdl: sdl1
[   58.885496] sd 11:0:0:0: [sdl] Attached SCSI disk
[   58.885641] ACPI: PCI Interrupt 0000:02:00.0[A] -> GSI 17 (level, low) -> IRQ 17
[   58.885756] PCI: Setting latency timer of device 0000:02:00.0 to 64
[   58.885790] scsi12 : pata_marvell
[   58.885892] scsi13 : pata_marvell
[   58.886030] ata13: PATA max UDMA/100 cmd 0x0000000000015018 ctl 0x0000000000015026 bmdma 0x0000000000015000 irq 0
[   58.886113] ata14: DUMMY
[   58.886182] BAR5:00:00 01:7F 02:22 03:CA 04:00 05:00 06:00 07:00 08:00 09:00 0A:00 0B:00 0C:01 0D:00 0E:00 0F:00 
[   59.192728] ata13.00: ATAPI, max UDMA/33
[   59.349867] ata13.00: configured for UDMA/33
[   59.350932] scsi 12:0:0:0: CD-ROM            _NEC     DVD_RW ND-3520A  1.04 PQ: 0 ANSI: 5
[   59.355097] sr0: scsi3-mmc drive: 48x/48x writer cd/rw xa/form2 cdda tray
[   59.355179] Uniform CD-ROM driver Revision: 3.20
[   59.355295] sr 12:0:0:0: Attached scsi CD-ROM sr0
[   59.355382] ACPI: PCI Interrupt 0000:00:1a.7[C] -> GSI 18 (level, low) -> IRQ 18
[   59.355485] PCI: Setting latency timer of device 0000:00:1a.7 to 64
[   59.355489] ehci_hcd 0000:00:1a.7: EHCI Host Controller
[   59.355585] ehci_hcd 0000:00:1a.7: new USB bus registered, assigned bus number 1
[   59.355679] ehci_hcd 0000:00:1a.7: debug port 1
[   59.355733] PCI: cache line size of 32 is not supported by device 0000:00:1a.7
[   59.355738] ehci_hcd 0000:00:1a.7: irq 18, io mem 0xe0625c00
[   59.359701] ehci_hcd 0000:00:1a.7: USB 2.0 started, EHCI 1.00, driver 10 Dec 2004
[   59.359916] usb usb1: configuration #1 chosen from 1 choice
[   59.360043] hub 1-0:1.0: USB hub found
[   59.360100] hub 1-0:1.0: 4 ports detected
[   59.460812] ACPI: PCI Interrupt 0000:00:1d.7[A] -> GSI 23 (level, low) -> IRQ 23
[   59.460915] PCI: Setting latency timer of device 0000:00:1d.7 to 64
[   59.460940] ehci_hcd 0000:00:1d.7: EHCI Host Controller
[   59.461058] ehci_hcd 0000:00:1d.7: new USB bus registered, assigned bus number 2
[   59.461166] ehci_hcd 0000:00:1d.7: debug port 1
[   59.461227] PCI: cache line size of 32 is not supported by device 0000:00:1d.7
[   59.461232] ehci_hcd 0000:00:1d.7: irq 23, io mem 0xe0625800
[   59.465147] ehci_hcd 0000:00:1d.7: USB 2.0 started, EHCI 1.00, driver 10 Dec 2004
[   59.465349] usb usb2: configuration #1 chosen from 1 choice
[   59.465475] hub 2-0:1.0: USB hub found
[   59.465531] hub 2-0:1.0: 6 ports detected
[   59.566560] USB Universal Host Controller Interface driver v3.0
[   59.566659] ACPI: PCI Interrupt 0000:00:1a.0[A] -> GSI 16 (level, low) -> IRQ 16
[   59.566756] PCI: Setting latency timer of device 0000:00:1a.0 to 64
[   59.566759] uhci_hcd 0000:00:1a.0: UHCI Host Controller
[   59.566879] uhci_hcd 0000:00:1a.0: new USB bus registered, assigned bus number 3
[   59.566973] uhci_hcd 0000:00:1a.0: irq 16, io base 0x000060c0
[   59.567170] usb usb3: configuration #1 chosen from 1 choice
[   59.567298] hub 3-0:1.0: USB hub found
[   59.567937] hub 3-0:1.0: 2 ports detected
[   59.668727] ACPI: PCI Interrupt 0000:00:1a.1[B] -> GSI 21 (level, low) -> IRQ 21
[   59.668825] PCI: Setting latency timer of device 0000:00:1a.1 to 64
[   59.668829] uhci_hcd 0000:00:1a.1: UHCI Host Controller
[   59.668968] uhci_hcd 0000:00:1a.1: new USB bus registered, assigned bus number 4
[   59.669063] uhci_hcd 0000:00:1a.1: irq 21, io base 0x000060a0
[   59.669258] usb usb4: configuration #1 chosen from 1 choice
[   59.669386] hub 4-0:1.0: USB hub found
[   59.669442] hub 4-0:1.0: 2 ports detected
[   59.770816] ACPI: PCI Interrupt 0000:00:1d.0[A] -> GSI 23 (level, low) -> IRQ 23
[   59.770915] PCI: Setting latency timer of device 0000:00:1d.0 to 64
[   59.770929] uhci_hcd 0000:00:1d.0: UHCI Host Controller
[   59.771061] uhci_hcd 0000:00:1d.0: new USB bus registered, assigned bus number 5
[   59.771144] uhci_hcd 0000:00:1d.0: irq 23, io base 0x00006080
[   59.771355] usb usb5: configuration #1 chosen from 1 choice
[   59.771481] hub 5-0:1.0: USB hub found
[   59.771537] hub 5-0:1.0: 2 ports detected
[   59.872869] ACPI: PCI Interrupt 0000:00:1d.1[B] -> GSI 19 (level, low) -> IRQ 19
[   59.872966] PCI: Setting latency timer of device 0000:00:1d.1 to 64
[   59.872970] uhci_hcd 0000:00:1d.1: UHCI Host Controller
[   59.873098] uhci_hcd 0000:00:1d.1: new USB bus registered, assigned bus number 6
[   59.873201] uhci_hcd 0000:00:1d.1: irq 19, io base 0x00006060
[   59.873394] usb usb6: configuration #1 chosen from 1 choice
[   59.873522] hub 6-0:1.0: USB hub found
[   59.873579] hub 6-0:1.0: 2 ports detected
[   59.974945] ACPI: PCI Interrupt 0000:00:1d.2[C] -> GSI 18 (level, low) -> IRQ 18
[   59.975042] PCI: Setting latency timer of device 0000:00:1d.2 to 64
[   59.975046] uhci_hcd 0000:00:1d.2: UHCI Host Controller
[   59.975185] uhci_hcd 0000:00:1d.2: new USB bus registered, assigned bus number 7
[   59.975288] uhci_hcd 0000:00:1d.2: irq 18, io base 0x00006040
[   59.975488] usb usb7: configuration #1 chosen from 1 choice
[   59.975613] hub 7-0:1.0: USB hub found
[   59.975670] hub 7-0:1.0: 2 ports detected
[   60.077046] Initializing USB Mass Storage driver...
[   60.080010] usb 5-1: new low speed USB device using uhci_hcd and address 2
[   60.244227] usb 5-1: configuration #1 chosen from 1 choice
[   60.247286] usbcore: registered new interface driver usb-storage
[   60.247337] USB Mass Storage support registered.
[   60.247477] PNP: PS/2 Controller [PNP0303:PS2K,PNP0f03:PS2M] at 0x60,0x64 irq 1,12
[   60.250401] serio: i8042 KBD port at 0x60,0x64 irq 1
[   60.250448] serio: i8042 AUX port at 0x60,0x64 irq 12
[   60.250547] mice: PS/2 mouse device common for all mice
[   60.250886] input: PC Speaker as /class/input/input0
[   60.250976] i2c /dev entries driver
[   60.251125] coretemp coretemp.0: Using undocumented features, absolute temperature might be wrong!
[   60.251289] coretemp coretemp.1: Using undocumented features, absolute temperature might be wrong!
[   60.251442] coretemp coretemp.2: Using undocumented features, absolute temperature might be wrong!
[   60.251595] coretemp coretemp.3: Using undocumented features, absolute temperature might be wrong!
[   60.251733] md: raid1 personality registered for level 1
[   60.268013] raid6: int64x1   2027 MB/s
[   60.285002] raid6: int64x2   2781 MB/s
[   60.301998] raid6: int64x4   3101 MB/s
[   60.318996] raid6: int64x8   1914 MB/s
[   60.335979] raid6: sse2x1    3824 MB/s
[   60.352979] raid6: sse2x2    4222 MB/s
[   60.369967] raid6: sse2x4    6777 MB/s
[   60.370029] raid6: using algorithm sse2x4 (6777 MB/s)
[   60.370091] md: raid6 personality registered for level 6
[   60.370148] md: raid5 personality registered for level 5
[   60.370216] md: raid4 personality registered for level 4
[   60.370262] raid5: automatically using best checksumming function: generic_sse
[   60.374963]    generic_sse:  8524.000 MB/sec
[   60.375006] raid5: using function: generic_sse (8524.000 MB/sec)
[   60.375269] usbcore: registered new interface driver hiddev
[   60.823087] hiddev96: USB HID v1.00 Device [        UPS] on usb-0000:00:1d.0-1
[   60.823210] usbcore: registered new interface driver usbhid
[   60.823260] drivers/hid/usbhid/hid-core.c: v2.6:USB HID core driver
[   60.823328] Advanced Linux Sound Architecture Driver Version 1.0.14 (Thu May 31 09:03:25 2007 UTC).
[   60.823664] ACPI: PCI Interrupt 0000:00:1b.0[A] -> GSI 22 (level, low) -> IRQ 22
[   60.823775] PCI: Setting latency timer of device 0000:00:1b.0 to 64
[   60.892415] input: ImPS/2 Generic Wheel Mouse as /class/input/input1
[   60.916271] input: AT Translated Set 2 keyboard as /class/input/input2
[   60.957332] ALSA device list:
[   60.957377]   #0: HDA Intel at 0xe0620000 irq 22
[   60.957429] u32 classifier
[   60.957482]     Actions configured 
[   60.957527] nf_conntrack version 0.5.0 (8192 buckets, 65536 max)
[   60.957781] ip_tables: (C) 2000-2006 Netfilter Core Team
[   60.957880] TCP cubic registered
[   60.957937] NET: Registered protocol family 1
[   60.957994] NET: Registered protocol family 17
[   60.958370] md: Autodetecting RAID arrays.
[   61.110568] md: autorun ...
[   61.110608] md: considering sdl1 ...
[   61.110657] md:  adding sdl1 ...
[   61.110699] md:  adding sdk1 ...
[   61.110764] md:  adding sdj1 ...
[   61.110811] md:  adding sdi1 ...
[   61.110855] md:  adding sdh1 ...
[   61.110910] md:  adding sdg1 ...
[   61.110962] md:  adding sdf1 ...
[   61.111007] md:  adding sde1 ...
[   61.111052] md:  adding sdd1 ...
[   61.111095] md:  adding sdc1 ...
[   61.111139] md: sdb3 has different UUID to sdl1
[   61.111186] md: sdb2 has different UUID to sdl1
[   61.111233] md: sdb1 has different UUID to sdl1
[   61.111279] md: sda3 has different UUID to sdl1
[   61.111326] md: sda2 has different UUID to sdl1
[   61.111374] md: sda1 has different UUID to sdl1
[   61.111489] md: created md3
[   61.111542] md: bind<sdc1>
[   61.111588] md: bind<sdd1>
[   61.111632] md: bind<sde1>
[   61.111678] md: bind<sdf1>
[   61.111724] md: bind<sdg1>
[   61.111770] md: bind<sdh1>
[   61.111841] md: bind<sdi1>
[   61.111886] md: bind<sdj1>
[   61.111932] md: bind<sdk1>
[   61.111999] md: bind<sdl1>
[   61.112045] md: running: <sdl1><sdk1><sdj1><sdi1><sdh1><sdg1><sdf1><sde1><sdd1><sdc1>
[   61.112350] raid5: device sdl1 operational as raid disk 2
[   61.112401] raid5: device sdk1 operational as raid disk 3
[   61.112448] raid5: device sdj1 operational as raid disk 0
[   61.112496] raid5: device sdi1 operational as raid disk 1
[   61.112543] raid5: device sdh1 operational as raid disk 6
[   61.112591] raid5: device sdg1 operational as raid disk 5
[   61.112639] raid5: device sdf1 operational as raid disk 4
[   61.112686] raid5: device sde1 operational as raid disk 7
[   61.112734] raid5: device sdd1 operational as raid disk 8
[   61.112782] raid5: device sdc1 operational as raid disk 9
[   61.113360] raid5: allocated 10562kB for md3
[   61.113406] raid5: raid level 5 set md3 active with 10 out of 10 devices, algorithm 2
[   61.113474] RAID5 conf printout:
[   61.113517]  --- rd:10 wd:10
[   61.113559]  disk 0, o:1, dev:sdj1
[   61.113602]  disk 1, o:1, dev:sdi1
[   61.113646]  disk 2, o:1, dev:sdl1
[   61.113687]  disk 3, o:1, dev:sdk1
[   61.113730]  disk 4, o:1, dev:sdf1
[   61.113774]  disk 5, o:1, dev:sdg1
[   61.113819]  disk 6, o:1, dev:sdh1
[   61.113862]  disk 7, o:1, dev:sde1
[   61.113926]  disk 8, o:1, dev:sdd1
[   61.113970]  disk 9, o:1, dev:sdc1
[   61.114083] md: considering sdb3 ...
[   61.114140] md:  adding sdb3 ...
[   61.114184] md: sdb2 has different UUID to sdb3
[   61.114230] md: sdb1 has different UUID to sdb3
[   61.114278] md:  adding sda3 ...
[   61.114320] md: sda2 has different UUID to sdb3
[   61.114366] md: sda1 has different UUID to sdb3
[   61.114482] md: created md2
[   61.114534] md: bind<sda3>
[   61.114577] md: bind<sdb3>
[   61.114624] md: running: <sdb3><sda3>
[   61.114765] raid1: raid set md2 active with 2 out of 2 mirrors
[   61.114873] md: considering sdb2 ...
[   61.114929] md:  adding sdb2 ...
[   61.114984] md: sdb1 has different UUID to sdb2
[   61.115040] md:  adding sda2 ...
[   61.115084] md: sda1 has different UUID to sdb2
[   61.115219] md: created md1
[   61.115271] md: bind<sda2>
[   61.115317] md: bind<sdb2>
[   61.115361] md: running: <sdb2><sda2>
[   61.115499] raid1: raid set md1 active with 2 out of 2 mirrors
[   61.115609] md: considering sdb1 ...
[   61.115665] md:  adding sdb1 ...
[   61.115710] md:  adding sda1 ...
[   61.115753] md: created md0
[   61.115794] md: bind<sda1>
[   61.115840] md: bind<sdb1>
[   61.115887] md: running: <sdb1><sda1>
[   61.116029] raid1: raid set md0 active with 2 out of 2 mirrors
[   61.116720] md: ... autorun DONE.
[   61.146509] UDF-fs: No VRS found
[   61.146710] Filesystem "md2": Disabling barriers, not supported by the underlying device
[   61.156442] XFS mounting filesystem md2
[   61.237815] Ending clean XFS mount for filesystem: md2
[   61.237835] VFS: Mounted root (xfs filesystem) readonly.
[   61.237927] Freeing unused kernel memory: 216k freed
[   63.224910] Adding 16787768k swap on /dev/md0.  Priority:-1 extents:1 across:16787768k
[   63.273053] Filesystem "md2": Disabling barriers, not supported by the underlying device
[   64.303771] kjournald starting.  Commit interval 5 seconds
[   64.309323] EXT3 FS on md1, internal journal
[   64.309396] EXT3-fs: mounted filesystem with ordered data mode.
[   64.310221] Filesystem "md3": Disabling barriers, not supported by the underlying device
[   64.310483] XFS mounting filesystem md3
[   64.563068] Ending clean XFS mount for filesystem: md3
[   82.790183] e1000: eth1: e1000_watchdog: NIC Link is Up 100 Mbps Full Duplex, Flow Control: RX
[   84.590210] process `syslogd' is using obsolete setsockopt SO_BSDCOMPAT
[  106.449661] mtrr: no more MTRRs available

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-06 22:28             ` Jesse Barnes
  2007-06-06 22:31               ` Justin Piszcz
  2007-06-06 22:35               ` Justin Piszcz
@ 2007-06-06 22:37               ` Randy Dunlap
  2007-06-06 22:46                 ` Justin Piszcz
  2007-06-06 22:54                 ` Justin Piszcz
  2007-06-06 22:39               ` Justin Piszcz
  2007-06-06 22:57               ` Justin Piszcz
  4 siblings, 2 replies; 118+ messages in thread
From: Randy Dunlap @ 2007-06-06 22:37 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: Justin Piszcz, Andi Kleen, linux-kernel, Eric W. Biederman

On Wed, 6 Jun 2007 15:28:43 -0700 Jesse Barnes wrote:

> On Wednesday, June 6, 2007 3:26 pm Justin Piszcz wrote:
> > Nope, I booted with only netconsole= options.  I have a lot of HW in
> > the box and I guess the buffer is too small.  Not sure where to
> > change it in the kernel.  Looking..
> 
> It's called "kernel log buffer size" and it's in "General setup".

or you can just boot with "log_buf_len=256k" on the kernel boot line (e.g.)

---
~Randy
*** Remember to use Documentation/SubmitChecklist when testing your code ***

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-06 22:28             ` Jesse Barnes
                                 ` (2 preceding siblings ...)
  2007-06-06 22:37               ` Randy Dunlap
@ 2007-06-06 22:39               ` Justin Piszcz
  2007-06-06 22:57               ` Justin Piszcz
  4 siblings, 0 replies; 118+ messages in thread
From: Justin Piszcz @ 2007-06-06 22:39 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: Andi Kleen, linux-kernel, Eric W. Biederman



On Wed, 6 Jun 2007, Jesse Barnes wrote:

> On Wednesday, June 6, 2007 3:26 pm Justin Piszcz wrote:
>> Nope, I booted with only netconsole= options.  I have a lot of HW in
>> the box and I guess the buffer is too small.  Not sure where to
>> change it in the kernel.  Looking..
>
> It's called "kernel log buffer size" and it's in "General setup".
>
> Jesse
>

Seems stable & fast so far, except when it hit swap :)

top - 18:38:13 up 5 min,  6 users,  load average: 38.56, 12.65, 4.52
Tasks: 227 total,  64 running, 163 sleeping,   0 stopped,   0 zombie
Cpu0  :  0.0%us, 23.9%sy,  0.0%ni,  0.0%id, 72.7%wa,  0.0%hi,  3.4%si,  0.0%st
Cpu1  :  0.0%us, 23.5%sy,  0.0%ni,  1.0%id, 75.5%wa,  0.0%hi,  0.0%si,  0.0%st
Cpu2  :  0.0%us, 24.1%sy,  0.0%ni,  0.0%id, 75.9%wa,  0.0%hi,  0.0%si,  0.0%st
Cpu3  :  0.0%us, 23.6%sy,  0.0%ni,  0.0%id, 76.4%wa,  0.0%hi,  0.0%si,  0.0%st
Mem:   8039576k total,  7998988k used,    40588k free,       12k buffers
Swap: 16787768k total,   879324k used, 15908444k free,    16316k cached

Tested with
$ stress --vm 32 --vm-bytes 250M
and
$ stress --vm 64 --vm-bytes 250M

   PID USER      PR  NI  VIRT  RES  SHR S %CPU %MEM    TIME+  COMMAND
   248 root      10  -5     0    0    0 D   18  0.0   0:07.81 kswapd0
  3788 war       18   0  256m 115m  164 D    3  1.5   0:01.32 stress
  3800 war       18   0  256m 7300  160 R    3  0.1   0:00.58 stress
  3801 war       18   0  256m  34m  160 R    3  0.4   0:02.48 stress
  3808 war       18   0  256m 217m  164 R    3  2.8   0:01.16 stress
   559 root      10  -5     0    0    0 D    2  0.0   0:00.15 md0_raid1
  3771 war       18   0  256m 174m  164 R    2  2.2   0:01.30 stress
  3785 war       18   0  256m  30m  164 R    2  0.4   0:01.42 stress
  3813 war       18   0  256m  34m  164 R    2  0.4   0:01.06 stress
  3787 war       18   0  256m  24m  160 R    2  0.3   0:00.25 stress
  3794 war       18   0  256m  40m  160 R    2  0.5   0:00.14 stress
  3778 war       18   0  256m 109m  164 R    2  1.4   0:01.23 stress
  3783 war       18   0  256m 185m  164 R    2  2.4   0:01.29 stress
  3799 war       18   0  256m 172m  164 R    2  2.2   0:01.07 stress
  3816 war       18   0  256m 203m  160 R    2  2.6   0:00.39 stress
  3819 war       18   0  256m 144m  160 R    2  1.8   0:00.40 stress
  3820 war       18   0  256m 147m  160 R    2  1.9   0:00.39 stress
  3825 war       18   0  256m  91m  164 R    2  1.2   0:01.16 stress
  3777 war       18   0  256m  29m  164 R    1  0.4   0:01.05 stress
  3779 war       18   0  256m 197m  164 R    1  2.5   0:01.29 stress
  3805 war       18   0  256m 216m  160 R    1  2.8   0:00.36 stress
  3812 war       18   0  256m  30m  164 R    1  0.4   0:01.14 stress
  3830 war       18   0  256m 176m  164 R    1  2.3   0:01.17 stress
  3831 war       18   0  256m  64m  164 R    1  0.8   0:01.40 stress
  3769 war       18   0  256m  43m  164 R    1  0.6   0:01.23 stress
  3770 war       18   0  256m 169m  164 R    1  2.2   0:01.18 stress
  3773 war       18   0  256m  44m  160 R    1  0.6   0:00.29 stress
  3774 war       18   0  256m  21m  160 R    1  0.3   0:00.21 stress
  3780 war       18   0  256m 167m  164 R    1  2.1   0:01.33 stress
  3781 war       18   0  256m 143m  164 R    1  1.8   0:01.35 stress
  3798 war       18   0  256m 153m  164 R    1  1.9   0:01.16 stress
  3807 war       18   0  256m  31m  164 R    1  0.4   0:01.17 stress
  3817 war       18   0  256m 185m  160 R    1  2.4   0:00.40 stress
  3822 war       18   0  256m 200m  160 R    1  2.6   0:00.41 stress
  3824 war       18   0  256m 8684  160 R    1  0.1   0:00.16 stress
  3827 war       18   0  256m 103m  164 R    1  1.3   0:00.95 stress
  3828 war       18   0  256m 1976  160 R    1  0.0   0:00.19 stress
  2026 daemon    30  15 30580  436  372 D    1  0.0   0:02.10 bindgraph.pl
  3768 war       18   0  256m  81m  164 R    1  1.0   0:01.84 stress
  3772 war       18   0  256m 158m  164 R    1  2.0   0:01.11 stress
  3782 war       18   0  256m 180m  164 R    1  2.3   0:01.25 stress
  3786 war       18   0  256m 173m  164 R    1  2.2   0:01.13 stress
  3790 war       18   0  256m  20m  164 R    1  0.3   0:01.10 stress
  3791 war       18   0  256m  36m  164 R    1  0.5   0:01.80 stress
  3796 war       18   0  256m 111m  164 R    1  1.4   0:01.04 stress
  3797 war       18   0  256m 120m  164 R    1  1.5   0:01.12 stress
  3803 war       18   0  256m 129m  160 R    1  1.6   0:01.98 stress
  3804 war       18   0  256m 234m  160 R    1  3.0   0:00.40 stress
  3806 war       18   0  256m  49m  164 R    1  0.6   0:00.99 stress
  3809 war       18   0  256m  54m  164 R    1  0.7   0:01.16 stress
  3814 war       18   0  256m  38m  164 R    1  0.5   0:00.97 stress
  3815 war       18   0  256m  69m  164 R    1  0.9   0:01.06 stress
   246 root      15   0     0    0    0 D    0  0.0   0:00.01 pdflush
  3687 war       15   0 18012  896  560 R    0  0.0   0:00.88 top
  3775 war       18   0  256m 153m  164 R    0  2.0   0:01.14 stress
  3776 war       18   0  256m  55m  164 R    0  0.7   0:01.34 stress
  3784 war       18   0  256m  13m  164 R    0  0.2   0:00.96 stress
  3789 war       18   0  256m  46m  164 R    0  0.6   0:01.54 stress
  3792 war       18   0  256m  12m  164 R    0  0.2   0:01.00 stress
  3793 war       18   0  256m  44m  164 R    0  0.6   0:00.96 stress
  3795 war       18   0  256m 196m  164 R    0  2.5   0:01.06 stress
  3802 war       18   0  256m  22m  160 R    0  0.3   0:02.70 stress
  3810 war       18   0  256m  40m  164 R    0  0.5   0:01.06 stress
  3818 war       18   0  256m 146m  160 R    0  1.9   0:00.34 stress
  3829 war       18   0  256m 130m  164 R    0  1.7   0:01.11 stress
   247 root      15   0     0    0    0 D    0  0.0   0:00.01 pdflush
   554 root      10  -5     0    0    0 D    0  0.0   0:00.01 md2_raid1
  3811 war       18   0  256m  61m  160 R    0  0.8   0:05.85 stress
  3821 war       18   0  256m 136m  160 R    0  1.7   0:00.37 stress
  3823 war       18   0  256m 204m  160 R    0  2.6   0:00.34 stress
  3826 war       18   0  256m  95m  164 R    0  1.2   0:01.03 stress





^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-06 22:37               ` Randy Dunlap
@ 2007-06-06 22:46                 ` Justin Piszcz
  2007-06-06 22:54                 ` Justin Piszcz
  1 sibling, 0 replies; 118+ messages in thread
From: Justin Piszcz @ 2007-06-06 22:46 UTC (permalink / raw)
  To: Randy Dunlap; +Cc: Jesse Barnes, Andi Kleen, linux-kernel, Eric W. Biederman



On Wed, 6 Jun 2007, Randy Dunlap wrote:

> On Wed, 6 Jun 2007 15:28:43 -0700 Jesse Barnes wrote:
>
>> On Wednesday, June 6, 2007 3:26 pm Justin Piszcz wrote:
>>> Nope, I booted with only netconsole= options.  I have a lot of HW in
>>> the box and I guess the buffer is too small.  Not sure where to
>>> change it in the kernel.  Looking..
>>
>> It's called "kernel log buffer size" and it's in "General setup".
>
> or you can just boot with "log_buf_len=256k" on the kernel boot line (e.g.)
>
> ---
> ~Randy
> *** Remember to use Documentation/SubmitChecklist when testing your code ***
>

Thanks, very useful note!

$ stress --vm 32 --vm-bytes 250M
stress: info: [4087] dispatching hogs: 0 cpu, 0 io, 32 vm, 0 hdd

Mem:   8039576k total,  5288036k used,  2751540k free,       12k buffers
Swap:        0k total,        0k used,        0k free,    37944k cached


$ stress --vm 40 --vm-bytes 250M
stress: info: [4120] dispatching hogs: 0 cpu, 0 io, 40 vm, 0 hdd
Mem:   8039576k total,  6661484k used,  1378092k free,       12k buffers
Swap:        0k total,        0k used,        0k free,    38000k cached

$ stress --vm 42 --vm-bytes 250M
stress: info: [4398] dispatching hogs: 0 cpu, 0 io, 42 vm, 0 hdd
Mem:   8039576k total,  6447048k used,  1592528k free,        8k buffers
Swap:        0k total,        0k used,        0k free,    11148k cached

$ stress --vm 45 --vm-bytes 250M
stress: info: [4352] dispatching hogs: 0 cpu, 0 io, 45 vm, 0 hdd
stress: FAIL: [4372] (494) hogvm malloc failed: Cannot allocate memory
stress: FAIL: [4352] (395) <-- worker 4372 returned error 1
stress: WARN: [4352] (397) now reaping child worker processes
stress: FAIL: [4352] (452) failed run completed in 4s

$ stress --vm 48 --vm-bytes 250M
stress: info: [4303] dispatching hogs: 0 cpu, 0 io, 48 vm, 0 hdd
stress: FAIL: [4323] (494) hogvm malloc failed: Cannot allocate memory
stress: FAIL: [4322] (494) hogvm malloc failed: Cannot allocate memory
stress: FAIL: [4303] (395) <-- worker 4323 returned error 1
stress: WARN: [4303] (397) now reaping child worker processes
stress: FAIL: [4303] (395) <-- worker 4322 returned error 1
stress: WARN: [4303] (397) now reaping child worker processes
stress: FAIL: [4303] (452) failed run completed in 4s

Is there a better way to verify I can use all the available memory?

Justin.

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-06 22:37               ` Randy Dunlap
  2007-06-06 22:46                 ` Justin Piszcz
@ 2007-06-06 22:54                 ` Justin Piszcz
  2007-06-06 23:11                   ` Randy Dunlap
  1 sibling, 1 reply; 118+ messages in thread
From: Justin Piszcz @ 2007-06-06 22:54 UTC (permalink / raw)
  To: Randy Dunlap; +Cc: Jesse Barnes, Andi Kleen, linux-kernel, Eric W. Biederman



On Wed, 6 Jun 2007, Randy Dunlap wrote:

> On Wed, 6 Jun 2007 15:28:43 -0700 Jesse Barnes wrote:
>
>> On Wednesday, June 6, 2007 3:26 pm Justin Piszcz wrote:
>>> Nope, I booted with only netconsole= options.  I have a lot of HW in
>>> the box and I guess the buffer is too small.  Not sure where to
>>> change it in the kernel.  Looking..
>>
>> It's called "kernel log buffer size" and it's in "General setup".
>
> or you can just boot with "log_buf_len=256k" on the kernel boot line (e.g.)
>
> ---
> ~Randy
> *** Remember to use Documentation/SubmitChecklist when testing your code ***
>

Hm, not sure if it was from the patch or what but I ran this:

1. swapoff -a
2. ./eatmem

The machine responded to ping and alt-sysrq-b but the box remain 
unresponsive, I guess the kernel did not kill the process? :(

The moments before it 'froze'

top - 18:48:01 up 15 min,  7 users,  load average: 6.61, 18.50, 13.31
Tasks: 200 total,  18 running, 182 sleeping,   0 stopped,   0 zombie
Cpu(s):  0.0%us, 90.7%sy,  0.0%ni,  5.9%id,  3.3%wa,  0.0%hi,  0.0%si, 
0.0%st
Mem:   8039576k total,  7998860k used,    40716k free,        8k buffers
Swap:        0k total,        0k used,        0k free,     1664k cached

   PID USER      PR  NI  VIRT  RES  SHR S %CPU %MEM    TIME+  COMMAND
   248 root      11  -5     0    0    0 R   85  0.0   0:16.05 kswapd0
  2265 nut       18   0 13320  244    4 R   40  0.0   0:03.13 newhidups
  2267 nut       18   0 12216  168    4 R   40  0.0   0:02.04 upsd
  2474 ntp       18   0 22192  400    8 R   39  0.0   0:02.00 ntpd
  3563 jpiszcz   18   0 41964 1264    4 R   38  0.0   0:02.20 pine
  3530 root      18   0 96240 3132   36 R   37  0.0   0:02.09 kdm_greet
  2052 root      18   0  6080  112    4 R   37  0.0   0:02.00 hald-addon-stor
  4479 war       17   0 18012  700  252 R   33  0.0   0:01.81 top
  4480 war       19   0 6948m 6.8g    4 R   22 88.4   0:05.81 eatmem
  2095 root      18   0 13128  216    8 R   10  0.0   0:00.50 dirmngr
  2545 root      18   0 95788 2488    4 R    5  0.0   0:00.25 apache2
  3564 war       18   0 41620  832    4 R    5  0.0   0:00.34 pine
  2270 nut       15   0 12212  144    4 S    1  0.0   0:00.05 upsmon
   561 root      10  -5     0    0    0 S    0  0.0   0:00.02 xfsbufd

Very simply program:

#include <iostream>
using namespace std;

int main()
{
   long int interations = 10000000;
   int counter = 1;

   for(counter;counter<interations;counter++)
   {
      double *d = new double[100];
   }

   return 0;
}

Any idea why the OOM killer can or does not kill it?

Justin.

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-06 22:28             ` Jesse Barnes
                                 ` (3 preceding siblings ...)
  2007-06-06 22:39               ` Justin Piszcz
@ 2007-06-06 22:57               ` Justin Piszcz
  2007-06-06 23:20                 ` Jesse Barnes
  4 siblings, 1 reply; 118+ messages in thread
From: Justin Piszcz @ 2007-06-06 22:57 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: Andi Kleen, linux-kernel, Eric W. Biederman



On Wed, 6 Jun 2007, Jesse Barnes wrote:

> On Wednesday, June 6, 2007 3:26 pm Justin Piszcz wrote:
>> Nope, I booted with only netconsole= options.  I have a lot of HW in
>> the box and I guess the buffer is too small.  Not sure where to
>> change it in the kernel.  Looking..
>
> It's called "kernel log buffer size" and it's in "General setup".
>
> Jesse
>

Did the dmesg output get you what you needed?  Why the few KB difference? 
:)

Justin.

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-06 22:54                 ` Justin Piszcz
@ 2007-06-06 23:11                   ` Randy Dunlap
  2007-06-06 23:15                     ` Justin Piszcz
  0 siblings, 1 reply; 118+ messages in thread
From: Randy Dunlap @ 2007-06-06 23:11 UTC (permalink / raw)
  To: Justin Piszcz; +Cc: Jesse Barnes, Andi Kleen, linux-kernel, Eric W. Biederman

On Wed, 6 Jun 2007 18:54:37 -0400 (EDT) Justin Piszcz wrote:

> Hm, not sure if it was from the patch or what but I ran this:
> 
> 1. swapoff -a
> 2. ./eatmem
> 
> The machine responded to ping and alt-sysrq-b but the box remain 
> unresponsive, I guess the kernel did not kill the process? :(
> 
> The moments before it 'froze'
> 
> top - 18:48:01 up 15 min,  7 users,  load average: 6.61, 18.50, 13.31
> Tasks: 200 total,  18 running, 182 sleeping,   0 stopped,   0 zombie
> Cpu(s):  0.0%us, 90.7%sy,  0.0%ni,  5.9%id,  3.3%wa,  0.0%hi,  0.0%si, 
> 0.0%st
> Mem:   8039576k total,  7998860k used,    40716k free,        8k buffers
> Swap:        0k total,        0k used,        0k free,     1664k cached
> 
>    PID USER      PR  NI  VIRT  RES  SHR S %CPU %MEM    TIME+  COMMAND
>    248 root      11  -5     0    0    0 R   85  0.0   0:16.05 kswapd0
>   2265 nut       18   0 13320  244    4 R   40  0.0   0:03.13 newhidups
>   2267 nut       18   0 12216  168    4 R   40  0.0   0:02.04 upsd
>   2474 ntp       18   0 22192  400    8 R   39  0.0   0:02.00 ntpd
>   3563 jpiszcz   18   0 41964 1264    4 R   38  0.0   0:02.20 pine
>   3530 root      18   0 96240 3132   36 R   37  0.0   0:02.09 kdm_greet
>   2052 root      18   0  6080  112    4 R   37  0.0   0:02.00 hald-addon-stor
>   4479 war       17   0 18012  700  252 R   33  0.0   0:01.81 top
>   4480 war       19   0 6948m 6.8g    4 R   22 88.4   0:05.81 eatmem
>   2095 root      18   0 13128  216    8 R   10  0.0   0:00.50 dirmngr
>   2545 root      18   0 95788 2488    4 R    5  0.0   0:00.25 apache2
>   3564 war       18   0 41620  832    4 R    5  0.0   0:00.34 pine
>   2270 nut       15   0 12212  144    4 S    1  0.0   0:00.05 upsmon
>    561 root      10  -5     0    0    0 S    0  0.0   0:00.02 xfsbufd
> 
> Very simply program:
> 
> #include <iostream>
> using namespace std;
> 
> int main()
> {
>    long int interations = 10000000;
>    int counter = 1;
> 
>    for(counter;counter<interations;counter++)
>    {
>       double *d = new double[100];

You usually have to access the allocated memory, like:

	*d = 1.0;

for it to actually be allocated (AFAIK).

>    }
> 
>    return 0;
> }
> 
> Any idea why the OOM killer can or does not kill it?

What are the values of /proc/sys/vm/overcommit* ?

See Documentation/vm/overcommit-accounting .

---
~Randy
*** Remember to use Documentation/SubmitChecklist when testing your code ***

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-06 23:11                   ` Randy Dunlap
@ 2007-06-06 23:15                     ` Justin Piszcz
  2007-06-06 23:34                       ` Jesse Barnes
  0 siblings, 1 reply; 118+ messages in thread
From: Justin Piszcz @ 2007-06-06 23:15 UTC (permalink / raw)
  To: Randy Dunlap; +Cc: Jesse Barnes, Andi Kleen, linux-kernel, Eric W. Biederman



On Wed, 6 Jun 2007, Randy Dunlap wrote:

> On Wed, 6 Jun 2007 18:54:37 -0400 (EDT) Justin Piszcz wrote:
>
>> Hm, not sure if it was from the patch or what but I ran this:
>>
>> 1. swapoff -a
>> 2. ./eatmem
>>
>
> You usually have to access the allocated memory, like:
>
> 	*d = 1.0;
>
> for it to actually be allocated (AFAIK).
>
>>    }
>>
>>    return 0;
>> }
>>
>> Any idea why the OOM killer can or does not kill it?
>
> What are the values of /proc/sys/vm/overcommit* ?
>
> See Documentation/vm/overcommit-accounting .

They should be the defaults as I do not change them:

p34:~# find /proc/|grep -i overcommit
/proc/sys/vm/overcommit_memory
/proc/sys/vm/overcommit_ratio
find: /proc/5128: No such file or directory
p34:~# cat /proc/sys/vm/overcommit_memory
0
p34:~# cat /proc/sys/vm/overcommit_ratio
50
p34:~#


Comments?

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-06 22:57               ` Justin Piszcz
@ 2007-06-06 23:20                 ` Jesse Barnes
  2007-06-06 23:24                   ` Justin Piszcz
  0 siblings, 1 reply; 118+ messages in thread
From: Jesse Barnes @ 2007-06-06 23:20 UTC (permalink / raw)
  To: Justin Piszcz; +Cc: Andi Kleen, linux-kernel, Eric W. Biederman

On Wednesday, June 6, 2007 3:57 pm Justin Piszcz wrote:
> On Wed, 6 Jun 2007, Jesse Barnes wrote:
> > On Wednesday, June 6, 2007 3:26 pm Justin Piszcz wrote:
> >> Nope, I booted with only netconsole= options.  I have a lot of HW
> >> in the box and I guess the buffer is too small.  Not sure where to
> >> change it in the kernel.  Looking..
> >
> > It's called "kernel log buffer size" and it's in "General setup".
> >
> > Jesse
>
> Did the dmesg output get you what you needed?  Why the few KB
> difference?
>
> :)

Yeah, looked at your e820 and your MTRR settings and I think my patch is 
doing the right thing (i.e. trimming just the right amount of memory, 
leaving you with as much as possible).

The mem= approach though looks slightly off, but I haven't looked at 
x86_64's mem= handling to see why.  From a high level though, adjusting 
end_pfn is the right thing to do, since theoretically mem= could choose 
to make holes in your low memory and keep your high memory in the 
allocation pools (though it's not generally implemented this way).

Jesse

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-06 23:20                 ` Jesse Barnes
@ 2007-06-06 23:24                   ` Justin Piszcz
  2007-06-06 23:27                     ` Jesse Barnes
  0 siblings, 1 reply; 118+ messages in thread
From: Justin Piszcz @ 2007-06-06 23:24 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: Andi Kleen, linux-kernel, Eric W. Biederman



On Wed, 6 Jun 2007, Jesse Barnes wrote:

> On Wednesday, June 6, 2007 3:57 pm Justin Piszcz wrote:
>> On Wed, 6 Jun 2007, Jesse Barnes wrote:
>>> On Wednesday, June 6, 2007 3:26 pm Justin Piszcz wrote:
>>>> Nope, I booted with only netconsole= options.  I have a lot of HW
>>>> in the box and I guess the buffer is too small.  Not sure where to
>>>> change it in the kernel.  Looking..
>>>
>>> It's called "kernel log buffer size" and it's in "General setup".
>>>
>>> Jesse
>>
>> Did the dmesg output get you what you needed?  Why the few KB
>> difference?
>>
>> :)
>
> Yeah, looked at your e820 and your MTRR settings and I think my patch is
> doing the right thing (i.e. trimming just the right amount of memory,
> leaving you with as much as possible).
>
> The mem= approach though looks slightly off, but I haven't looked at
> x86_64's mem= handling to see why.  From a high level though, adjusting
> end_pfn is the right thing to do, since theoretically mem= could choose
> to make holes in your low memory and keep your high memory in the
> allocation pools (though it's not generally implemented this way).
>
> Jesse
>

Ahh, ok!  Sounds great, I will keep running the kernel with your patch 
without mem= and let you know if I see any issues.

Chances of getting this into 2.6.22-rc5?

Justin.

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-06 23:24                   ` Justin Piszcz
@ 2007-06-06 23:27                     ` Jesse Barnes
  2007-06-07  8:51                       ` Andi Kleen
  0 siblings, 1 reply; 118+ messages in thread
From: Jesse Barnes @ 2007-06-06 23:27 UTC (permalink / raw)
  To: Justin Piszcz; +Cc: Andi Kleen, linux-kernel, Eric W. Biederman

On Wednesday, June 6, 2007 4:24 pm Justin Piszcz wrote:
> > The mem= approach though looks slightly off, but I haven't looked
> > at x86_64's mem= handling to see why.  From a high level though,
> > adjusting end_pfn is the right thing to do, since theoretically
> > mem= could choose to make holes in your low memory and keep your
> > high memory in the allocation pools (though it's not generally
> > implemented this way).
> >
> > Jesse
>
> Ahh, ok!  Sounds great, I will keep running the kernel with your
> patch without mem= and let you know if I see any issues.
>
> Chances of getting this into 2.6.22-rc5?

I'm not sure it's appropriate for -rc5 since it mucks around with some 
early boot ordering, but I'll leave that to Andi, since it does address 
some real bugs people have been seeing.

Can we add your "Tested-by:  Justin Piszcz <jpiszcz@lucidpixels.com>" to 
the patch? :)

Thanks,
Jesse

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-06 23:15                     ` Justin Piszcz
@ 2007-06-06 23:34                       ` Jesse Barnes
  2007-06-07  8:10                         ` Justin Piszcz
  0 siblings, 1 reply; 118+ messages in thread
From: Jesse Barnes @ 2007-06-06 23:34 UTC (permalink / raw)
  To: Justin Piszcz; +Cc: Randy Dunlap, Andi Kleen, linux-kernel, Eric W. Biederman

On Wednesday, June 6, 2007 4:15 pm Justin Piszcz wrote:
> On Wed, 6 Jun 2007, Randy Dunlap wrote:
> > On Wed, 6 Jun 2007 18:54:37 -0400 (EDT) Justin Piszcz wrote:
> >> Hm, not sure if it was from the patch or what but I ran this:
> >>
> >> 1. swapoff -a
> >> 2. ./eatmem
> >
> > You usually have to access the allocated memory, like:
> >
> > 	*d = 1.0;
> >
> > for it to actually be allocated (AFAIK).
> >
> >>    }
> >>
> >>    return 0;
> >> }
> >>
> >> Any idea why the OOM killer can or does not kill it?
> >
> > What are the values of /proc/sys/vm/overcommit* ?
> >
> > See Documentation/vm/overcommit-accounting .
>
> They should be the defaults as I do not change them:
>
> p34:~# find /proc/|grep -i overcommit
> /proc/sys/vm/overcommit_memory
> /proc/sys/vm/overcommit_ratio
> find: /proc/5128: No such file or directory
> p34:~# cat /proc/sys/vm/overcommit_memory
> 0
> p34:~# cat /proc/sys/vm/overcommit_ratio
> 50
> p34:~#
>
>
> Comments?

You can be sure your memory is available if reported in /proc/meminfo or 
at boot, since those represent the actual kernel data structures used 
for memory allocation:

[    0.000000] On node 0 totalpages: 2061783

That corresponds to 2061783*4k = 8445063168 bytes or ~8053M.  Is that 
fairly close to what's actually installed in the machine?

Note that your boot also mentions this:

[  106.449661] mtrr: no more MTRRs available

which indicates that things like X may not be able to map the 
framebuffer with the 'write-combine' attribute, which will hurt 
performance.  I've heard reports that turning of 'Intel QST fan 
control' in your BIOS settings will prevent all your MTRRs from being 
used (improperly, probably another BIOS bug) so that X will perform 
well.  But if you don't use X on this machine, you don't have to worry 
about it.  The other option would be to remap your MTRRs by hand to 
free one up for X, you can do that by combining the last one or two 
entries into a single MTRR using the API described in 
Documentation/mtrr.txt before you start X.

Jesse

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-06 19:29 [PATCH] trim memory not covered by WB MTRRs Jesse Barnes
  2007-06-06 20:26 ` Justin Piszcz
  2007-06-06 21:53 ` Justin Piszcz
@ 2007-06-07  7:45 ` Eric W. Biederman
  2007-06-07 17:30   ` Jesse Barnes
  2007-06-07  8:16 ` Andi Kleen
                   ` (5 subsequent siblings)
  8 siblings, 1 reply; 118+ messages in thread
From: Eric W. Biederman @ 2007-06-07  7:45 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: Andi Kleen, linux-kernel, Justin Piszcz

Jesse Barnes <jesse.barnes@intel.com> writes:

> On some machines, buggy BIOSes don't properly setup WB MTRRs to
> cover all available RAM, meaning the last few megs (or even gigs)
> of memory will be marked uncached.  Since Linux tends to allocate
> from high memory addresses first, this causes the machine to be
> unusably slow as soon as the kernel starts really using memory
> (i.e. right around init time).
>
> This patch works around the problem by scanning the MTRRs at
> boot and figuring out whether the current end_pfn value (setup
> by early e820 code) goes beyond the highest WB MTRR range, and
> if so, trimming it to match.  A fairly obnoxious KERN_WARNING
> is printed too, letting the user know that not all of their
> memory is available due to a likely BIOS bug.
>
> Something similar could be done on i386 if needed, but the boot
> ordering would be slightly different, since the MTRR code on i386
> depends on the boot_cpu_data structure being setup.
>
> Justin, can you please test and make sure this patch works for
> you too?  It'll only work around the problem, but it's better
> than having to do mem= by hand or waiting for a fix from your
> BIOS vendor.

Ok.  Overall this feels good but a few nits below.
Would it make sense to split this into two patches.
The first to just do the cleanup that removes the allocations
for holding the mttr ranges?

> Thanks,
> Jesse
>
> Signed-off-by:  Jesse Barnes <jesse.barnes@intel.com>
>
> diff --git a/arch/i386/kernel/cpu/mtrr/generic.c
> b/arch/i386/kernel/cpu/mtrr/generic.c
> index c4ebb51..71fc768 100644
> --- a/arch/i386/kernel/cpu/mtrr/generic.c
> +++ b/arch/i386/kernel/cpu/mtrr/generic.c
> @@ -13,7 +13,7 @@
>  #include "mtrr.h"
>
>  
>  struct mtrr_state {
> -	struct mtrr_var_range *var_ranges;
> +	struct mtrr_var_range var_ranges[NUM_VAR_RANGES];

Could we name it MAX_VAR_RANGES and not NUM_VAR_RANGES.
In practices this is going to be 8 for every cpu I know of,
so calling this NUM_VAR_RANGES may be a little confusing.

>  	mtrr_type fixed_ranges[NUM_FIXED_RANGES];
>  	unsigned char enabled;
>  	unsigned char have_fixed;
> @@ -84,12 +84,6 @@ void get_mtrr_state(void)
>  	struct mtrr_var_range *vrs;
>  	unsigned lo, dummy;
>  
> -	if (!mtrr_state.var_ranges) {
> - mtrr_state.var_ranges = kmalloc(num_var_ranges * sizeof (struct
> mtrr_var_range),
> -						GFP_KERNEL);
> -		if (!mtrr_state.var_ranges)
> -			return;
> -	} 
>  	vrs = mtrr_state.var_ranges;
>  
>  	rdmsr(MTRRcap_MSR, lo, dummy);
> diff --git a/arch/i386/kernel/cpu/mtrr/if.c b/arch/i386/kernel/cpu/mtrr/if.c
> index c7d8f17..d7922ce 100644
> --- a/arch/i386/kernel/cpu/mtrr/if.c
> +++ b/arch/i386/kernel/cpu/mtrr/if.c
> @@ -12,7 +12,7 @@
>  #include "mtrr.h"
>  
>  /* RED-PEN: this is accessed without any locking */
> -extern unsigned int *usage_table;
> +extern unsigned int usage_table[];
I think that should be:
> +extern unsigned int usage_table[NUM_VAR_RANGES];
Or even better yet the declaration moved to a header file.


>  
> +/**
> + * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs
> + *
> + * Some buggy BIOSes don't setup the MTRRs properly for systems with certain
> + * memory configurations.  This routine checks to make sure the MTRRs having
> + * a write back type cover all of the memory the kernel is intending to use.
> + * If not, it'll trim any memory off the end by adjusting end_pfn, removing
> + * it from the kernel's allocation pools, warning the user with an obnoxious
> + * message.
> + */
> +void __init mtrr_trim_uncached_memory(void)
> +{
> +	unsigned long i, base, size, highest_addr = 0;
> +	mtrr_type type;
> +
> +	/* Find highest cached pfn */
> +	for (i = 0; i < num_var_ranges; i++) {
> +		mtrr_if->get(i, &base, &size, &type);
> +		if (type != MTRR_TYPE_WRBACK)
> +			continue;
> +		base <<= PAGE_SHIFT;
> +		size <<= PAGE_SHIFT;
> +		if (highest_addr < base + size)
> +			highest_addr = base + size;
> +	}

This looks like it will handle the common case, so I have no major objections
to this code.

At least in theory and possibly in practice there are a couple of corner
cases we have missed her.

- Overlapping MTRRs.
- What happens if we have uncached memory lower down?
  Except for performance problems I guess that case is relatively harmless.
- Is it possible and worth it to amend the e820 map, so it shows the
  problem area as Reserved or otherwise not usable RAM?


> +
> +	if ((highest_addr >> PAGE_SHIFT) != end_pfn) {
> +		printk(KERN_WARNING "***************\n");
> +		printk(KERN_WARNING "**** WARNING: likely BIOS bug\n");
> +		printk(KERN_WARNING "**** MTRRs don't cover all of "
> +		       "memory, trimmed %ld pages\n", end_pfn -
> +		       (highest_addr >> PAGE_SHIFT));
> +		printk(KERN_WARNING "***************\n");
> +		end_pfn = highest_addr >> PAGE_SHIFT;
> +	}
> +}
>  
>  /**
>   * mtrr_bp_init - initialize mtrrs on the boot CPU
> diff --git a/arch/i386/kernel/cpu/mtrr/mtrr.h b/arch/i386/kernel/cpu/mtrr/mtrr.h
> index 289dfe6..a29dcba 100644
> --- a/arch/i386/kernel/cpu/mtrr/mtrr.h
> +++ b/arch/i386/kernel/cpu/mtrr/mtrr.h
> @@ -14,6 +14,7 @@
>  #define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
>  
>  #define NUM_FIXED_RANGES 88
> +#define NUM_VAR_RANGES 256
MAX_VAR_RANGES?

>  #define MTRRfix64K_00000_MSR 0x250
>  #define MTRRfix16K_80000_MSR 0x258
>  #define MTRRfix16K_A0000_MSR 0x259

Eric

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-06 23:34                       ` Jesse Barnes
@ 2007-06-07  8:10                         ` Justin Piszcz
  0 siblings, 0 replies; 118+ messages in thread
From: Justin Piszcz @ 2007-06-07  8:10 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: Randy Dunlap, Andi Kleen, linux-kernel, Eric W. Biederman



On Wed, 6 Jun 2007, Jesse Barnes wrote:

> On Wednesday, June 6, 2007 4:15 pm Justin Piszcz wrote:
>> On Wed, 6 Jun 2007, Randy Dunlap wrote:
>>> On Wed, 6 Jun 2007 18:54:37 -0400 (EDT) Justin Piszcz wrote:
>>>> Hm, not sure if it was from the patch or what but I ran this:
>>>>
>>>> 1. swapoff -a
>>>> 2. ./eatmem
>>>
>>> You usually have to access the allocated memory, like:
>>>
>>> 	*d = 1.0;
>>>
>>> for it to actually be allocated (AFAIK).
>>>
>>>>    }
>>>>
>>>>    return 0;
>>>> }
>>>>
>>>> Any idea why the OOM killer can or does not kill it?
>>>
>>> What are the values of /proc/sys/vm/overcommit* ?
>>>
>>> See Documentation/vm/overcommit-accounting .
>>
>> They should be the defaults as I do not change them:
>>
>> p34:~# find /proc/|grep -i overcommit
>> /proc/sys/vm/overcommit_memory
>> /proc/sys/vm/overcommit_ratio
>> find: /proc/5128: No such file or directory
>> p34:~# cat /proc/sys/vm/overcommit_memory
>> 0
>> p34:~# cat /proc/sys/vm/overcommit_ratio
>> 50
>> p34:~#
>>
>>
>> Comments?
>
> You can be sure your memory is available if reported in /proc/meminfo or
> at boot, since those represent the actual kernel data structures used
> for memory allocation:
>
> [    0.000000] On node 0 totalpages: 2061783
>
> That corresponds to 2061783*4k = 8445063168 bytes or ~8053M.  Is that
> fairly close to what's actually installed in the machine?
>
> Note that your boot also mentions this:
>
> [  106.449661] mtrr: no more MTRRs available
>
> which indicates that things like X may not be able to map the
> framebuffer with the 'write-combine' attribute, which will hurt
> performance.  I've heard reports that turning of 'Intel QST fan
> control' in your BIOS settings will prevent all your MTRRs from being
> used (improperly, probably another BIOS bug) so that X will perform
> well.  But if you don't use X on this machine, you don't have to worry
> about it.  The other option would be to remap your MTRRs by hand to
> free one up for X, you can do that by combining the last one or two
> entries into a single MTRR using the API described in
> Documentation/mtrr.txt before you start X.
>
> Jesse
>

FYI--

[  106.449661] mtrr: no more MTRRs available

This has always occurred, even with mem=8832M setting.

Justin.

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-06 19:29 [PATCH] trim memory not covered by WB MTRRs Jesse Barnes
                   ` (2 preceding siblings ...)
  2007-06-07  7:45 ` Eric W. Biederman
@ 2007-06-07  8:16 ` Andi Kleen
  2007-06-07 17:35   ` Jesse Barnes
  2007-06-07 14:41 ` Pavel Machek
                   ` (4 subsequent siblings)
  8 siblings, 1 reply; 118+ messages in thread
From: Andi Kleen @ 2007-06-07  8:16 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: Andi Kleen, linux-kernel, Justin Piszcz, Eric W. Biederman

On Wed, Jun 06, 2007 at 12:29:23PM -0700, Jesse Barnes wrote:
> On some machines, buggy BIOSes don't properly setup WB MTRRs to
> cover all available RAM, meaning the last few megs (or even gigs)
> of memory will be marked uncached.  Since Linux tends to allocate
> from high memory addresses first, this causes the machine to be
> unusably slow as soon as the kernel starts really using memory
> (i.e. right around init time).

In theory -- while not recommended -- a BIOS could also
use a default fallback MTRR for cached and use explicit MTRRs to 
map the non existing ranges uncached. Would it make sense to handle this case?

Right now if someone used a default WC MTRR to make the memory
cached you would clip all memory.

Perhaps a fail safe would be good -- always leave some
memory left over even if it looks wrong.

Should also probably have some command line option 
to disable the check in case something bad happens with it.

Another thing that might be sense to investigate in relationship
to this patch is large page mappings with MTRRs. iirc P4 and also K8
splits pages internally with MTRR boundaries and might have some other 
bad side effects. Should we use this as hints to use 4K pages
for the boundary areas?

-Andi


^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-06 23:27                     ` Jesse Barnes
@ 2007-06-07  8:51                       ` Andi Kleen
  2007-06-07  8:53                         ` Justin Piszcz
                                           ` (2 more replies)
  0 siblings, 3 replies; 118+ messages in thread
From: Andi Kleen @ 2007-06-07  8:51 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: Justin Piszcz, Andi Kleen, linux-kernel, Eric W. Biederman

On Wed, Jun 06, 2007 at 04:27:46PM -0700, Jesse Barnes wrote:
> On Wednesday, June 6, 2007 4:24 pm Justin Piszcz wrote:
> > > The mem= approach though looks slightly off, but I haven't looked
> > > at x86_64's mem= handling to see why.  From a high level though,
> > > adjusting end_pfn is the right thing to do, since theoretically
> > > mem= could choose to make holes in your low memory and keep your
> > > high memory in the allocation pools (though it's not generally
> > > implemented this way).
> > >
> > > Jesse
> >
> > Ahh, ok!  Sounds great, I will keep running the kernel with your
> > patch without mem= and let you know if I see any issues.
> >
> > Chances of getting this into 2.6.22-rc5?
> 
> I'm not sure it's appropriate for -rc5 since it mucks around with some 
> early boot ordering, but I'll leave that to Andi, since it does address 
> some real bugs people have been seeing.

I don't think the patch is suitable for merging at this time. Perhaps
if it survives some time in -mm* / 2.6.23* it could be backported
in a later 2.6.22 stable release. But right now it definitely
needs more testing and addressing of my review comments.

> Can we add your "Tested-by:  Justin Piszcz <jpiszcz@lucidpixels.com>" to 
> the patch? :)

All such headers are only for the trail of blame and do you want to blame
Justin if anything goes wrong? Perhaps it should rather have a 
Blame-to: <whoever wrote Justin/Jesse's BIOS> but that also wouldn't 
help without concrete contact points.

-ANdi

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-07  8:51                       ` Andi Kleen
@ 2007-06-07  8:53                         ` Justin Piszcz
  2007-06-07  9:55                         ` Satyam Sharma
  2007-06-07 17:33                         ` Jesse Barnes
  2 siblings, 0 replies; 118+ messages in thread
From: Justin Piszcz @ 2007-06-07  8:53 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Jesse Barnes, linux-kernel, Eric W. Biederman



On Thu, 7 Jun 2007, Andi Kleen wrote:

> On Wed, Jun 06, 2007 at 04:27:46PM -0700, Jesse Barnes wrote:
>> On Wednesday, June 6, 2007 4:24 pm Justin Piszcz wrote:
>>>> The mem= approach though looks slightly off, but I haven't looked
>>>> at x86_64's mem= handling to see why.  From a high level though,
>>>> adjusting end_pfn is the right thing to do, since theoretically
>>>> mem= could choose to make holes in your low memory and keep your
>>>> high memory in the allocation pools (though it's not generally
>>>> implemented this way).
>>>>
>>>> Jesse
>>>
>>> Ahh, ok!  Sounds great, I will keep running the kernel with your
>>> patch without mem= and let you know if I see any issues.
>>>
>>> Chances of getting this into 2.6.22-rc5?
>>
>> I'm not sure it's appropriate for -rc5 since it mucks around with some
>> early boot ordering, but I'll leave that to Andi, since it does address
>> some real bugs people have been seeing.
>
> I don't think the patch is suitable for merging at this time. Perhaps
> if it survives some time in -mm* / 2.6.23* it could be backported
> in a later 2.6.22 stable release. But right now it definitely
> needs more testing and addressing of my review comments.
>
>> Can we add your "Tested-by:  Justin Piszcz <jpiszcz@lucidpixels.com>" to
>> the patch? :)
>
> All such headers are only for the trail of blame and do you want to blame
> Justin if anything goes wrong? Perhaps it should rather have a
> Blame-to: <whoever wrote Justin/Jesse's BIOS> but that also wouldn't
> help without concrete contact points.
>
> -ANdi
>

Hah!  Again, I'll keep runnihg with Jesse's patch and as long as I can 
keep patching newer kernels I can continue to run with it.  So far, 
overnight with backups and the like, I have not noticed any problems. 
Also tested logging in to X/KDE, no issues. [yet]

Justin.

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-07  8:51                       ` Andi Kleen
  2007-06-07  8:53                         ` Justin Piszcz
@ 2007-06-07  9:55                         ` Satyam Sharma
  2007-06-07 17:33                         ` Jesse Barnes
  2 siblings, 0 replies; 118+ messages in thread
From: Satyam Sharma @ 2007-06-07  9:55 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Jesse Barnes, Justin Piszcz, linux-kernel, Eric W. Biederman,
	Andrew Morton

On 6/7/07, Andi Kleen <andi@firstfloor.org> wrote:
> On Wed, Jun 06, 2007 at 04:27:46PM -0700, Jesse Barnes wrote:
> > [...]
> > I'm not sure it's appropriate for -rc5 since it mucks around with some
> > early boot ordering, but I'll leave that to Andi, since it does address
> > some real bugs people have been seeing.
>
> I don't think the patch is suitable for merging at this time. Perhaps
> if it survives some time in -mm* / 2.6.23* it could be backported
> in a later 2.6.22 stable release. But right now it definitely
> needs more testing and addressing of my review comments.

BTW an unrelated/happy side-effect of the patch is that it removes
the zero-size-guilty kmalloc()'s from the mtrr code. -mm does have
some fixes that prevent those zero-size allocations, which could
likely be retired if this gets merged in? ...

[ On another unrelated note, could someone tell me who maintains
i386? I've looked around, but surprisingly can't seem to find
anybody listed anywhere. ]

Satyam

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-06 19:29 [PATCH] trim memory not covered by WB MTRRs Jesse Barnes
                   ` (3 preceding siblings ...)
  2007-06-07  8:16 ` Andi Kleen
@ 2007-06-07 14:41 ` Pavel Machek
  2007-06-08  0:20 ` Andrew Morton
                   ` (3 subsequent siblings)
  8 siblings, 0 replies; 118+ messages in thread
From: Pavel Machek @ 2007-06-07 14:41 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: Andi Kleen, linux-kernel, Justin Piszcz, Eric W. Biederman

Hi!

> This patch works around the problem by scanning the MTRRs at
> boot and figuring out whether the current end_pfn value (setup
> by early e820 code) goes beyond the highest WB MTRR range, and
> if so, trimming it to match.  A fairly obnoxious KERN_WARNING
> is printed too, letting the user know that not all of their
> memory is available due to a likely BIOS bug.

Way too obnoxious warning, I'd say. Just drop the *s.

> +	if ((highest_addr >> PAGE_SHIFT) != end_pfn) {
> +		printk(KERN_WARNING "***************\n");
> +		printk(KERN_WARNING "**** WARNING: likely BIOS bug\n");
> +		printk(KERN_WARNING "**** MTRRs don't cover all of "
> +		       "memory, trimmed %ld pages\n", end_pfn -
> +		       (highest_addr >> PAGE_SHIFT));
> +		printk(KERN_WARNING "***************\n");
> +		end_pfn = highest_addr >> PAGE_SHIFT;

...and I'd print lost memory in kilobytes, to be more luser-friendly.

							Pavel

-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-07  7:45 ` Eric W. Biederman
@ 2007-06-07 17:30   ` Jesse Barnes
  2007-06-08 23:13     ` Eric W. Biederman
  0 siblings, 1 reply; 118+ messages in thread
From: Jesse Barnes @ 2007-06-07 17:30 UTC (permalink / raw)
  To: Eric W. Biederman; +Cc: Andi Kleen, linux-kernel, Justin Piszcz

On Thursday, June 7, 2007 12:45 am Eric W. Biederman wrote:
> Ok.  Overall this feels good but a few nits below.
> Would it make sense to split this into two patches.
> The first to just do the cleanup that removes the allocations
> for holding the mttr ranges?

I suppose we could split it, but it's small, and the only reason for 
removing the allocations was so that we could init it earlier.

> >  struct mtrr_state {
> > -	struct mtrr_var_range *var_ranges;
> > +	struct mtrr_var_range var_ranges[NUM_VAR_RANGES];
>
> Could we name it MAX_VAR_RANGES and not NUM_VAR_RANGES.
> In practices this is going to be 8 for every cpu I know of,
> so calling this NUM_VAR_RANGES may be a little confusing.

You're right, I should have kept the old name with MAX_ in it.  I'll fix 
it up.

> >  /* RED-PEN: this is accessed without any locking */
> > -extern unsigned int *usage_table;
> > +extern unsigned int usage_table[];
>
> I think that should be:
> > +extern unsigned int usage_table[NUM_VAR_RANGES];
>
> Or even better yet the declaration moved to a header file.

Oops, yeah, this should just be in mtrr.h.

> This looks like it will handle the common case, so I have no major
> objections to this code.
>
> At least in theory and possibly in practice there are a couple of
> corner cases we have missed her.
>
> - Overlapping MTRRs.

Overlapping should be ok, since that's usually intentional (e.g. one big 
wb range with a portion of uc space due to another mtrr).

> - What happens if we have uncached memory lower down?

Holes definitely aren't dealt with, but then we haven't seen any yet...

>   Except for performance problems I guess that case is relatively
> harmless. - Is it possible and worth it to amend the e820 map, so it
> shows the problem area as Reserved or otherwise not usable RAM?

That would be useful, but only if we moved the check to a little 
earlier, prior to the addition of the active ranges from the e820.  
Might be a little nicer than adjusting end_pfn, but will ultimately 
achieve the same thing...

Jesse

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-07  8:51                       ` Andi Kleen
  2007-06-07  8:53                         ` Justin Piszcz
  2007-06-07  9:55                         ` Satyam Sharma
@ 2007-06-07 17:33                         ` Jesse Barnes
  2 siblings, 0 replies; 118+ messages in thread
From: Jesse Barnes @ 2007-06-07 17:33 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Justin Piszcz, linux-kernel, Eric W. Biederman

On Thursday, June 7, 2007 1:51 am Andi Kleen wrote:
> On Wed, Jun 06, 2007 at 04:27:46PM -0700, Jesse Barnes wrote:
> > On Wednesday, June 6, 2007 4:24 pm Justin Piszcz wrote:
> > > > The mem= approach though looks slightly off, but I haven't
> > > > looked at x86_64's mem= handling to see why.  From a high level
> > > > though, adjusting end_pfn is the right thing to do, since
> > > > theoretically mem= could choose to make holes in your low
> > > > memory and keep your high memory in the allocation pools
> > > > (though it's not generally implemented this way).
> > > >
> > > > Jesse
> > >
> > > Ahh, ok!  Sounds great, I will keep running the kernel with your
> > > patch without mem= and let you know if I see any issues.
> > >
> > > Chances of getting this into 2.6.22-rc5?
> >
> > I'm not sure it's appropriate for -rc5 since it mucks around with
> > some early boot ordering, but I'll leave that to Andi, since it
> > does address some real bugs people have been seeing.
>
> I don't think the patch is suitable for merging at this time. Perhaps
> if it survives some time in -mm* / 2.6.23* it could be backported
> in a later 2.6.22 stable release. But right now it definitely
> needs more testing and addressing of my review comments.
>
> > Can we add your "Tested-by:  Justin Piszcz
> > <jpiszcz@lucidpixels.com>" to the patch? :)
>
> All such headers are only for the trail of blame and do you want to
> blame Justin if anything goes wrong? Perhaps it should rather have a
> Blame-to: <whoever wrote Justin/Jesse's BIOS> but that also wouldn't
> help without concrete contact points.

I think that header would be Lame-workaround-needed-because-of: 
<bad.bios.writer@foo.bar>. :)  The idea of tested-by is to give people 
a clue about who would be able to test any changes in the area 
affected.  So far from blaming Justin, it would give him credit for all 
his testing, and let people know that he might be able to test similar 
patches in the future.  I think it's worthwhile to track that...

Jesse

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-07  8:16 ` Andi Kleen
@ 2007-06-07 17:35   ` Jesse Barnes
  2007-06-07 17:40     ` Justin Piszcz
  0 siblings, 1 reply; 118+ messages in thread
From: Jesse Barnes @ 2007-06-07 17:35 UTC (permalink / raw)
  To: Andi Kleen; +Cc: linux-kernel, Justin Piszcz, Eric W. Biederman

On Thursday, June 7, 2007 1:16 am Andi Kleen wrote:
> On Wed, Jun 06, 2007 at 12:29:23PM -0700, Jesse Barnes wrote:
> > On some machines, buggy BIOSes don't properly setup WB MTRRs to
> > cover all available RAM, meaning the last few megs (or even gigs)
> > of memory will be marked uncached.  Since Linux tends to allocate
> > from high memory addresses first, this causes the machine to be
> > unusably slow as soon as the kernel starts really using memory
> > (i.e. right around init time).
>
> In theory -- while not recommended -- a BIOS could also
> use a default fallback MTRR for cached and use explicit MTRRs to
> map the non existing ranges uncached. Would it make sense to handle
> this case?

Probably.  I could just check the default memory type and bail out if 
it's cacheable.

> Should also probably have some command line option
> to disable the check in case something bad happens with it.

Sure.

> Another thing that might be sense to investigate in relationship
> to this patch is large page mappings with MTRRs. iirc P4 and also K8
> splits pages internally with MTRR boundaries and might have some
> other bad side effects. Should we use this as hints to use 4K pages
> for the boundary areas?

Or I could trim to the nearest large page boundary...  We'd lose a 
little more memory but it would keep things simple.

Jesse

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-07 17:35   ` Jesse Barnes
@ 2007-06-07 17:40     ` Justin Piszcz
  0 siblings, 0 replies; 118+ messages in thread
From: Justin Piszcz @ 2007-06-07 17:40 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: Andi Kleen, linux-kernel, Eric W. Biederman



On Thu, 7 Jun 2007, Jesse Barnes wrote:

> On Thursday, June 7, 2007 1:16 am Andi Kleen wrote:
>> On Wed, Jun 06, 2007 at 12:29:23PM -0700, Jesse Barnes wrote:
>>> On some machines, buggy BIOSes don't properly setup WB MTRRs to
>>> cover all available RAM, meaning the last few megs (or even gigs)
>>> of memory will be marked uncached.  Since Linux tends to allocate
>>> from high memory addresses first, this causes the machine to be
>>> unusably slow as soon as the kernel starts really using memory
>>> (i.e. right around init time).
>>
>> In theory -- while not recommended -- a BIOS could also
>> use a default fallback MTRR for cached and use explicit MTRRs to
>> map the non existing ranges uncached. Would it make sense to handle
>> this case?
>
> Probably.  I could just check the default memory type and bail out if
> it's cacheable.
>
>> Should also probably have some command line option
>> to disable the check in case something bad happens with it.
>
> Sure.
>
>> Another thing that might be sense to investigate in relationship
>> to this patch is large page mappings with MTRRs. iirc P4 and also K8
>> splits pages internally with MTRR boundaries and might have some
>> other bad side effects. Should we use this as hints to use 4K pages
>> for the boundary areas?
>
> Or I could trim to the nearest large page boundary...  We'd lose a
> little more memory but it would keep things simple.
>
> Jesse
>

How much more memory are we going to lose?  Is mem= a better option if its 
going to keep decreasing?

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-06 19:29 [PATCH] trim memory not covered by WB MTRRs Jesse Barnes
                   ` (4 preceding siblings ...)
  2007-06-07 14:41 ` Pavel Machek
@ 2007-06-08  0:20 ` Andrew Morton
  2007-06-08  1:33   ` Jesse Barnes
  2007-06-08 21:15 ` Andrew Morton
                   ` (2 subsequent siblings)
  8 siblings, 1 reply; 118+ messages in thread
From: Andrew Morton @ 2007-06-08  0:20 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: Andi Kleen, linux-kernel, Justin Piszcz, Eric W. Biederman

On Wed, 6 Jun 2007 12:29:23 -0700
Jesse Barnes <jesse.barnes@intel.com> wrote:

> --- a/arch/i386/kernel/cpu/mtrr/if.c
> +++ b/arch/i386/kernel/cpu/mtrr/if.c
> @@ -12,7 +12,7 @@
>  #include "mtrr.h"
>  
>  /* RED-PEN: this is accessed without any locking */
> -extern unsigned int *usage_table;
> +extern unsigned int usage_table[];
>  
>  
> --- a/arch/i386/kernel/cpu/mtrr/main.c
> +++ b/arch/i386/kernel/cpu/mtrr/main.c
> @@ -47,7 +47,7 @@
>  
>  u32 num_var_ranges = 0;
>  
> -unsigned int *usage_table;
> +unsigned int usage_table[NUM_VAR_RANGES];
>  static DEFINE_MUTEX(mtrr_mutex);

didn't it feel all dirty when you had to do that?



From: Andrew Morton <akpm@linux-foundation.org>

- Move the declaration into a header file

- "usage_table" is a dumb name for an mtrr-specific kernel-wide identifier.

  There appear to beseveral other poorly-chosen identifiers in mtrr.

Cc: Andi Kleen <ak@suse.de>
Cc: Jesse Barnes <jesse.barnes@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 arch/i386/kernel/cpu/mtrr/if.c   |    8 ++------
 arch/i386/kernel/cpu/mtrr/main.c |   17 +++++++++--------
 arch/i386/kernel/cpu/mtrr/mtrr.h |    2 ++
 3 files changed, 13 insertions(+), 14 deletions(-)

diff -puN arch/i386/kernel/cpu/mtrr/if.c~i386-x86_64-trim-memory-not-covered-by-wb-mtrrs-fix arch/i386/kernel/cpu/mtrr/if.c
--- a/arch/i386/kernel/cpu/mtrr/if.c~i386-x86_64-trim-memory-not-covered-by-wb-mtrrs-fix
+++ a/arch/i386/kernel/cpu/mtrr/if.c
@@ -11,10 +11,6 @@
 #include <asm/mtrr.h>
 #include "mtrr.h"
 
-/* RED-PEN: this is accessed without any locking */
-extern unsigned int usage_table[];
-
-
 #define FILE_FCOUNT(f) (((struct seq_file *)((f)->private_data))->private)
 
 static const char *const mtrr_strings[MTRR_NUM_TYPES] =
@@ -396,7 +392,7 @@ static int mtrr_seq_show(struct seq_file
 	for (i = 0; i < max; i++) {
 		mtrr_if->get(i, &base, &size, &type);
 		if (size == 0)
-			usage_table[i] = 0;
+			mtrr_usage_table[i] = 0;
 		else {
 			if (size < (0x100000 >> PAGE_SHIFT)) {
 				/* less than 1MB */
@@ -410,7 +406,7 @@ static int mtrr_seq_show(struct seq_file
 			len += seq_printf(seq, 
 				   "reg%02i: base=0x%05lx000 (%4luMB), size=%4lu%cB: %s, count=%d\n",
 			     i, base, base >> (20 - PAGE_SHIFT), size, factor,
-			     mtrr_attrib_to_str(type), usage_table[i]);
+			     mtrr_attrib_to_str(type), mtrr_usage_table[i]);
 		}
 	}
 	return 0;
diff -puN arch/i386/kernel/cpu/mtrr/main.c~i386-x86_64-trim-memory-not-covered-by-wb-mtrrs-fix arch/i386/kernel/cpu/mtrr/main.c
--- a/arch/i386/kernel/cpu/mtrr/main.c~i386-x86_64-trim-memory-not-covered-by-wb-mtrrs-fix
+++ a/arch/i386/kernel/cpu/mtrr/main.c
@@ -47,7 +47,7 @@
 
 u32 num_var_ranges = 0;
 
-unsigned int usage_table[NUM_VAR_RANGES];
+unsigned int mtrr_usage_table[NUM_VAR_RANGES];
 static DEFINE_MUTEX(mtrr_mutex);
 
 u64 size_or_mask, size_and_mask;
@@ -127,7 +127,7 @@ static void __init init_table(void)
 
 	max = num_var_ranges;
 	for (i = 0; i < max; i++)
-		usage_table[i] = 1;
+		mtrr_usage_table[i] = 1;
 }
 
 struct set_mtrr_data {
@@ -381,7 +381,7 @@ int mtrr_add_page(unsigned long base, un
 			goto out;
 		}
 		if (increment)
-			++usage_table[i];
+			++mtrr_usage_table[i];
 		error = i;
 		goto out;
 	}
@@ -390,12 +390,13 @@ int mtrr_add_page(unsigned long base, un
 	if (i >= 0) {
 		set_mtrr(i, base, size, type);
 		if (likely(replace < 0))
-			usage_table[i] = 1;
+			mtrr_usage_table[i] = 1;
 		else {
-			usage_table[i] = usage_table[replace] + !!increment;
+			mtrr_usage_table[i] = mtrr_usage_table[replace] +
+						!!increment;
 			if (unlikely(replace != i)) {
 				set_mtrr(replace, 0, 0, 0);
-				usage_table[replace] = 0;
+				mtrr_usage_table[replace] = 0;
 			}
 		}
 	} else
@@ -525,11 +526,11 @@ int mtrr_del_page(int reg, unsigned long
 		printk(KERN_WARNING "mtrr: MTRR %d not used\n", reg);
 		goto out;
 	}
-	if (usage_table[reg] < 1) {
+	if (mtrr_usage_table[reg] < 1) {
 		printk(KERN_WARNING "mtrr: reg: %d has count=0\n", reg);
 		goto out;
 	}
-	if (--usage_table[reg] < 1)
+	if (--mtrr_usage_table[reg] < 1)
 		set_mtrr(reg, 0, 0, 0);
 	error = reg;
  out:
diff -puN arch/i386/kernel/cpu/mtrr/mtrr.h~i386-x86_64-trim-memory-not-covered-by-wb-mtrrs-fix arch/i386/kernel/cpu/mtrr/mtrr.h
--- a/arch/i386/kernel/cpu/mtrr/mtrr.h~i386-x86_64-trim-memory-not-covered-by-wb-mtrrs-fix
+++ a/arch/i386/kernel/cpu/mtrr/mtrr.h
@@ -97,3 +97,5 @@ void mtrr_state_warn(void);
 const char *mtrr_attrib_to_str(int x);
 void mtrr_wrmsr(unsigned, unsigned, unsigned);
 
+/* RED-PEN: this is accessed without any locking */
+extern unsigned int mtrr_usage_table[];
_


^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-08  0:20 ` Andrew Morton
@ 2007-06-08  1:33   ` Jesse Barnes
  0 siblings, 0 replies; 118+ messages in thread
From: Jesse Barnes @ 2007-06-08  1:33 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Jesse Barnes, Andi Kleen, linux-kernel, Justin Piszcz, Eric W. Biederman

On Thursday, June 7, 2007 5:20:50 Andrew Morton wrote:
> > -unsigned int *usage_table;
> > +unsigned int usage_table[NUM_VAR_RANGES];
> >  static DEFINE_MUTEX(mtrr_mutex);
>
> didn't it feel all dirty when you had to do that?

Hey, this was already there... I didn't want to rewrite the whole thing at 
once. :)  Patch looks fine though.

Thanks,
Jesse

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-06 19:29 [PATCH] trim memory not covered by WB MTRRs Jesse Barnes
                   ` (5 preceding siblings ...)
  2007-06-08  0:20 ` Andrew Morton
@ 2007-06-08 21:15 ` Andrew Morton
  2007-06-08 21:28   ` Jesse Barnes
  2007-06-13  1:11 ` Eric W. Biederman
  2007-06-20 11:22 ` Helge Hafting
  8 siblings, 1 reply; 118+ messages in thread
From: Andrew Morton @ 2007-06-08 21:15 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: Andi Kleen, linux-kernel, Justin Piszcz, Eric W. Biederman

On Wed, 6 Jun 2007 12:29:23 -0700
Jesse Barnes <jesse.barnes@intel.com> wrote:

> On some machines, buggy BIOSes don't properly setup WB MTRRs to
> cover all available RAM, meaning the last few megs (or even gigs)
> of memory will be marked uncached.  Since Linux tends to allocate
> from high memory addresses first, this causes the machine to be
> unusably slow as soon as the kernel starts really using memory
> (i.e. right around init time).
> 
> This patch works around the problem by scanning the MTRRs at
> boot and figuring out whether the current end_pfn value (setup
> by early e820 code) goes beyond the highest WB MTRR range, and
> if so, trimming it to match.  A fairly obnoxious KERN_WARNING
> is printed too, letting the user know that not all of their
> memory is available due to a likely BIOS bug.
> 
> Something similar could be done on i386 if needed, but the boot
> ordering would be slightly different, since the MTRR code on i386
> depends on the boot_cpu_data structure being setup.

i386 allmodconfig:

arch/i386/kernel/cpu/mtrr/main.c: In function 'mtrr_trim_uncached_memory':
arch/i386/kernel/cpu/mtrr/main.c:655: error: 'end_pfn' undeclared (first use in this function)
arch/i386/kernel/cpu/mtrr/main.c:655: error: (Each undeclared identifier is reported only once
arch/i386/kernel/cpu/mtrr/main.c:655: error: for each function it appears in.)

I'll poke some ifdefs in there.

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-08 21:15 ` Andrew Morton
@ 2007-06-08 21:28   ` Jesse Barnes
  0 siblings, 0 replies; 118+ messages in thread
From: Jesse Barnes @ 2007-06-08 21:28 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Andi Kleen, linux-kernel, Justin Piszcz, Eric W. Biederman

On Friday, June 8, 2007 2:15:00 Andrew Morton wrote:
> > Something similar could be done on i386 if needed, but the boot
> > ordering would be slightly different, since the MTRR code on i386
> > depends on the boot_cpu_data structure being setup.
>
> i386 allmodconfig:
>
> arch/i386/kernel/cpu/mtrr/main.c: In function
> 'mtrr_trim_uncached_memory': arch/i386/kernel/cpu/mtrr/main.c:655:
> error: 'end_pfn' undeclared (first use in this function)
> arch/i386/kernel/cpu/mtrr/main.c:655: error: (Each undeclared
> identifier is reported only once
> arch/i386/kernel/cpu/mtrr/main.c:655: error: for each function it
> appears in.)
>
> I'll poke some ifdefs in there.

Oops, sorry about that, the code is only called on x86_64 but compiled 
even for i386...

Jesse

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-07 17:30   ` Jesse Barnes
@ 2007-06-08 23:13     ` Eric W. Biederman
  2007-06-12 15:39       ` Jesse Barnes
  0 siblings, 1 reply; 118+ messages in thread
From: Eric W. Biederman @ 2007-06-08 23:13 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: Andi Kleen, linux-kernel, Justin Piszcz

Jesse Barnes <jesse.barnes@intel.com> writes:


>> - Overlapping MTRRs.
>
> Overlapping should be ok, since that's usually intentional (e.g. one big 
> wb range with a portion of uc space due to another mtrr).

I'm not say overlapping was a bug.  I was saying that you don't handle
overlapping mtrrs in figuring the last cached addresses.  Therefore
when a UC range overlaps a WB range we might thing the last page
in the WB range is cached when it is not.

>> - What happens if we have uncached memory lower down?
>
> Holes definitely aren't dealt with, but then we haven't seen any yet...
>
>>   Except for performance problems I guess that case is relatively
>> harmless. - Is it possible and worth it to amend the e820 map, so it
>> shows the problem area as Reserved or otherwise not usable RAM?
>
> That would be useful, but only if we moved the check to a little 
> earlier, prior to the addition of the active ranges from the e820.  
> Might be a little nicer than adjusting end_pfn, but will ultimately 
> achieve the same thing...

Yes, with perhaps a little more consistency.

Eric

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-08 23:13     ` Eric W. Biederman
@ 2007-06-12 15:39       ` Jesse Barnes
  0 siblings, 0 replies; 118+ messages in thread
From: Jesse Barnes @ 2007-06-12 15:39 UTC (permalink / raw)
  To: Eric W. Biederman; +Cc: Andi Kleen, linux-kernel, Justin Piszcz

On Friday, June 8, 2007 4:13:22 Eric W. Biederman wrote:
> Jesse Barnes <jesse.barnes@intel.com> writes:
> >> - Overlapping MTRRs.
> >
> > Overlapping should be ok, since that's usually intentional (e.g.
> > one big wb range with a portion of uc space due to another mtrr).
>
> I'm not say overlapping was a bug.  I was saying that you don't
> handle overlapping mtrrs in figuring the last cached addresses. 
> Therefore when a UC range overlaps a WB range we might thing the last
> page in the WB range is cached when it is not.

Oh right, that might be an issue, but we haven't seen it yet...

Jesse

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-06 19:29 [PATCH] trim memory not covered by WB MTRRs Jesse Barnes
                   ` (6 preceding siblings ...)
  2007-06-08 21:15 ` Andrew Morton
@ 2007-06-13  1:11 ` Eric W. Biederman
  2007-06-13  2:29   ` Jesse Barnes
  2007-06-20 11:22 ` Helge Hafting
  8 siblings, 1 reply; 118+ messages in thread
From: Eric W. Biederman @ 2007-06-13  1:11 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: Andi Kleen, linux-kernel, Justin Piszcz

Jesse Barnes <jesse.barnes@intel.com> writes:

> On some machines, buggy BIOSes don't properly setup WB MTRRs to
> cover all available RAM, meaning the last few megs (or even gigs)
> of memory will be marked uncached.  Since Linux tends to allocate
> from high memory addresses first, this causes the machine to be
> unusably slow as soon as the kernel starts really using memory
> (i.e. right around init time).
>
> This patch works around the problem by scanning the MTRRs at
> boot and figuring out whether the current end_pfn value (setup
> by early e820 code) goes beyond the highest WB MTRR range, and
> if so, trimming it to match.  A fairly obnoxious KERN_WARNING
> is printed too, letting the user know that not all of their
> memory is available due to a likely BIOS bug.

A quick update.  This patch is horribly incorrect on a socket F 
opteron/Athlon 64 with memory above 4GB.

In particular those cpus are capable of mapping all of memory
above 4GB as write back without using a single MTRR.

So examining MTRRs is insufficient.

Eric

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-13  1:11 ` Eric W. Biederman
@ 2007-06-13  2:29   ` Jesse Barnes
  2007-06-13 22:19     ` Eric W. Biederman
  0 siblings, 1 reply; 118+ messages in thread
From: Jesse Barnes @ 2007-06-13  2:29 UTC (permalink / raw)
  To: Eric W. Biederman; +Cc: Jesse Barnes, Andi Kleen, linux-kernel, Justin Piszcz

On Tuesday, June 12, 2007 6:11:21 Eric W. Biederman wrote:
> Jesse Barnes <jesse.barnes@intel.com> writes:
> > On some machines, buggy BIOSes don't properly setup WB MTRRs to
> > cover all available RAM, meaning the last few megs (or even gigs)
> > of memory will be marked uncached.  Since Linux tends to allocate
> > from high memory addresses first, this causes the machine to be
> > unusably slow as soon as the kernel starts really using memory
> > (i.e. right around init time).
> >
> > This patch works around the problem by scanning the MTRRs at
> > boot and figuring out whether the current end_pfn value (setup
> > by early e820 code) goes beyond the highest WB MTRR range, and
> > if so, trimming it to match.  A fairly obnoxious KERN_WARNING
> > is printed too, letting the user know that not all of their
> > memory is available due to a likely BIOS bug.
>
> A quick update.  This patch is horribly incorrect on a socket F
> opteron/Athlon 64 with memory above 4GB.
>
> In particular those cpus are capable of mapping all of memory
> above 4GB as write back without using a single MTRR.
>
> So examining MTRRs is insufficient.

Hm, yuck.  What do you suggest?  Should we only run this check when Intel 
chips are present?  Checking only the bottom 4G isn't sufficient since we've 
seen platforms that have issues above that range...

Thanks,
Jesse

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-13  2:29   ` Jesse Barnes
@ 2007-06-13 22:19     ` Eric W. Biederman
  0 siblings, 0 replies; 118+ messages in thread
From: Eric W. Biederman @ 2007-06-13 22:19 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: Jesse Barnes, Andi Kleen, linux-kernel, Justin Piszcz

Jesse Barnes <jbarnes@virtuousgeek.org> writes:

> On Tuesday, June 12, 2007 6:11:21 Eric W. Biederman wrote:
>> Jesse Barnes <jesse.barnes@intel.com> writes:
>> > On some machines, buggy BIOSes don't properly setup WB MTRRs to
>> > cover all available RAM, meaning the last few megs (or even gigs)
>> > of memory will be marked uncached.  Since Linux tends to allocate
>> > from high memory addresses first, this causes the machine to be
>> > unusably slow as soon as the kernel starts really using memory
>> > (i.e. right around init time).
>> >
>> > This patch works around the problem by scanning the MTRRs at
>> > boot and figuring out whether the current end_pfn value (setup
>> > by early e820 code) goes beyond the highest WB MTRR range, and
>> > if so, trimming it to match.  A fairly obnoxious KERN_WARNING
>> > is printed too, letting the user know that not all of their
>> > memory is available due to a likely BIOS bug.
>>
>> A quick update.  This patch is horribly incorrect on a socket F
>> opteron/Athlon 64 with memory above 4GB.
>>
>> In particular those cpus are capable of mapping all of memory
>> above 4GB as write back without using a single MTRR.
>>
>> So examining MTRRs is insufficient.
>
> Hm, yuck.  What do you suggest?  Should we only run this check when Intel 
> chips are present?  Checking only the bottom 4G isn't sufficient since we've 
> seen platforms that have issues above that range...

My gut feel says that we need to call a function that is potentially cpu specific,
older AMD cpus and Intel cpus can just use the generic mtrr code.

I would also suggest we build a list of ranges of write-back memory.  Which
until we handle overlapping MTRRs in the generic MTRR case is just the write-back
MTRRs.

Then we get the data in a linux specific form we can check the linux specific
data structure against the e820 map.

I don't think that is going to much harder and it allows for creative cpu
designers.

Although this suggests that we want to worry about all memory holes as
well.  Because I have seen at least one system which failed to cover
the lower 4G with MTRRs.  While everything above 4G was fine.

Eric


^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-06 19:29 [PATCH] trim memory not covered by WB MTRRs Jesse Barnes
                   ` (7 preceding siblings ...)
  2007-06-13  1:11 ` Eric W. Biederman
@ 2007-06-20 11:22 ` Helge Hafting
  2007-06-20 14:37   ` Andi Kleen
  8 siblings, 1 reply; 118+ messages in thread
From: Helge Hafting @ 2007-06-20 11:22 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: Andi Kleen, linux-kernel, Justin Piszcz, Eric W. Biederman

Jesse Barnes wrote:
> On some machines, buggy BIOSes don't properly setup WB MTRRs to
> cover all available RAM, meaning the last few megs (or even gigs)
> of memory will be marked uncached.  Since Linux tends to allocate
> from high memory addresses first, this causes the machine to be
> unusably slow as soon as the kernel starts really using memory
> (i.e. right around init time).
>
> This patch works around the problem by scanning the MTRRs at
> boot and figuring out whether the current end_pfn value (setup
> by early e820 code) goes beyond the highest WB MTRR range, and
> if so, trimming it to match.  A fairly obnoxious KERN_WARNING
> is printed too, letting the user know that not all of their
> memory is available due to a likely BIOS bug.
>   
I assume this cannot be fixed by the simple approach
of echoing some useful numbers into /proc/mtrr like
we used to do for video memory? (Before X did this
automatically?)

An extra bootscript seems better than loosing memory.


Helge Hafting

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-20 11:22 ` Helge Hafting
@ 2007-06-20 14:37   ` Andi Kleen
  0 siblings, 0 replies; 118+ messages in thread
From: Andi Kleen @ 2007-06-20 14:37 UTC (permalink / raw)
  To: Helge Hafting
  Cc: Jesse Barnes, Andi Kleen, linux-kernel, Justin Piszcz, Eric W. Biederman

> I assume this cannot be fixed by the simple approach
> of echoing some useful numbers into /proc/mtrr like
> we used to do for video memory? (Before X did this
> automatically?)
> 
> An extra bootscript seems better than loosing memory.

In some cases it probably can, in other cases not because the memory
controller is misconfigured or not the right bits are set 
in the PCI bridges to enable DAC IO or ... or ...

There are also definite limits on how much quirks can do 
to fix this -- Linux is a generic kernel, not a BIOS replacement,
and can never be as intimate with the current setup
as the BIOS is.

It's definitely far safer to not use the memory. You're 
running in a situation never tested or considered by the 
motherboard vendor and everything is possible.

-Andi


^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-07-05 12:12 ` Pavel Machek
@ 2007-07-05 12:16   ` Justin Piszcz
  0 siblings, 0 replies; 118+ messages in thread
From: Justin Piszcz @ 2007-07-05 12:16 UTC (permalink / raw)
  To: Pavel Machek
  Cc: Jesse Barnes, linux-kernel, akpm, Andi Kleen, Eric W. Biederman,
	Yinghai Lu



On Thu, 5 Jul 2007, Pavel Machek wrote:

> Hi!
>
>> On some machines, buggy BIOSes don't properly setup WB MTRRs to
>> cover all available RAM, meaning the last few megs (or even gigs)
>> of memory will be marked uncached.  Since Linux tends to allocate
>> from high memory addresses first, this causes the machine to be
>> unusably slow as soon as the kernel starts really using memory
>> (i.e. right around init time).
>>
>> This patch works around the problem by scanning the MTRRs at
>> boot and figuring out whether the current end_pfn value (setup
>> by early e820 code) goes beyond the highest WB MTRR range, and
>> if so, trimming it to match.  A fairly obnoxious KERN_WARNING
>> is printed too, letting the user know that not all of their
>> memory is available due to a likely BIOS bug.
>>
>> Something similar could be done on i386 if needed, but the boot
>> ordering would be slightly different, since the MTRR code on i386
>> depends on the boot_cpu_data structure being setup.
>>
>> This patch fixes a bug in the last patch that caused the code to
>> run on non-Intel machines (AMD machines apparently don't need it
>> and it's untested on other non-Intel machines, so best keep it
>> off).
>>
>> akpm -- this one should replace all the mtrr patches currently
>> in your tree.
>>
>> Yinghai, maybe you can test this on one of your AMD machines to
>> make sure I got the CPU code right?
>
>> +	if ((highest_addr >> PAGE_SHIFT) != end_pfn) {
>> +		printk(KERN_WARNING "***************\n");
>> +		printk(KERN_WARNING "**** WARNING: likely BIOS bug\n");
>> +		printk(KERN_WARNING "**** MTRRs don't cover all of "
>> +		       "memory, trimmed %ld pages\n", end_pfn -
>> +		       (highest_addr >> PAGE_SHIFT));
>> +		printk(KERN_WARNING "***************\n");
>> +		end_pfn = highest_addr >> PAGE_SHIFT;
>
> I'd say using that many stars for KERN_WARNING printk is sign of
> mental illness or something...
> 							Pavel
> -- 
> (english) http://www.livejournal.com/~pavelmachek
> (cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
>

Will this patch make it into 2.6.23?

Been patching manually for a while with each -rc for 2.6.22..

Justin.

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-25 21:34 Jesse Barnes
                   ` (3 preceding siblings ...)
  2007-06-27 10:44 ` Pim Zandbergen
@ 2007-07-05 12:12 ` Pavel Machek
  2007-07-05 12:16   ` Justin Piszcz
  4 siblings, 1 reply; 118+ messages in thread
From: Pavel Machek @ 2007-07-05 12:12 UTC (permalink / raw)
  To: Jesse Barnes
  Cc: linux-kernel, akpm, Andi Kleen, Justin Piszcz, Eric W. Biederman,
	Yinghai Lu

Hi!

> On some machines, buggy BIOSes don't properly setup WB MTRRs to
> cover all available RAM, meaning the last few megs (or even gigs)
> of memory will be marked uncached.  Since Linux tends to allocate
> from high memory addresses first, this causes the machine to be
> unusably slow as soon as the kernel starts really using memory
> (i.e. right around init time).
> 
> This patch works around the problem by scanning the MTRRs at
> boot and figuring out whether the current end_pfn value (setup
> by early e820 code) goes beyond the highest WB MTRR range, and
> if so, trimming it to match.  A fairly obnoxious KERN_WARNING
> is printed too, letting the user know that not all of their
> memory is available due to a likely BIOS bug.
> 
> Something similar could be done on i386 if needed, but the boot
> ordering would be slightly different, since the MTRR code on i386
> depends on the boot_cpu_data structure being setup.
> 
> This patch fixes a bug in the last patch that caused the code to
> run on non-Intel machines (AMD machines apparently don't need it
> and it's untested on other non-Intel machines, so best keep it
> off).
> 
> akpm -- this one should replace all the mtrr patches currently
> in your tree.
> 
> Yinghai, maybe you can test this on one of your AMD machines to
> make sure I got the CPU code right?

> +	if ((highest_addr >> PAGE_SHIFT) != end_pfn) {
> +		printk(KERN_WARNING "***************\n");
> +		printk(KERN_WARNING "**** WARNING: likely BIOS bug\n");
> +		printk(KERN_WARNING "**** MTRRs don't cover all of "
> +		       "memory, trimmed %ld pages\n", end_pfn -
> +		       (highest_addr >> PAGE_SHIFT));
> +		printk(KERN_WARNING "***************\n");
> +		end_pfn = highest_addr >> PAGE_SHIFT;

I'd say using that many stars for KERN_WARNING printk is sign of
mental illness or something...
							Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-27 17:06             ` Jesse Barnes
@ 2007-06-27 17:17               ` Pim Zandbergen
  0 siblings, 0 replies; 118+ messages in thread
From: Pim Zandbergen @ 2007-06-27 17:17 UTC (permalink / raw)
  To: Jesse Barnes
  Cc: Mauro Giachero, linux-kernel, akpm, Andi Kleen, Justin Piszcz,
	Eric W. Biederman, Yinghai Lu

Jesse Barnes wrote:

> Yeah, that's what I needed.  end_pfn looks ok, but I guess my test is a 
> little too precise.  It should be if ((highest_addr >> PAGE_SHIFT) < 
> end_pfn) rather than !=. 

Right. That made the message disappear. Nice to know my BIOS is really 
fixed.

Thanks,
Pim

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-27 17:02           ` Pim Zandbergen
@ 2007-06-27 17:06             ` Jesse Barnes
  2007-06-27 17:17               ` Pim Zandbergen
  0 siblings, 1 reply; 118+ messages in thread
From: Jesse Barnes @ 2007-06-27 17:06 UTC (permalink / raw)
  To: Pim Zandbergen
  Cc: Mauro Giachero, linux-kernel, akpm, Andi Kleen, Justin Piszcz,
	Eric W. Biederman, Yinghai Lu

On Wednesday, June 27, 2007 10:02:35 Pim Zandbergen wrote:
> Jesse Barnes wrote:
> > It looks like end_pfn might be ~0UL now... can you print that out
> > in your configuration?
>
> Er, do you need the value of end_pfn ?
> Here's what I changed:
>
> if ((highest_addr >> PAGE_SHIFT) != end_pfn) {
>          printk(KERN_WARNING "***************\n");
>          printk(KERN_WARNING "**** WARNING: likely BIOS bug\n");
>          printk(KERN_WARNING "**** MTRRs don't cover all of "
>                 "memory, trimmed %lu pages\n", end_pfn -
>                 (highest_addr >> PAGE_SHIFT));
>          printk(KERN_WARNING "**** end_pfn before = %lu\n", end_pfn);
>          end_pfn = highest_addr >> PAGE_SHIFT;
>          printk(KERN_WARNING "**** end_pfn after = %lu\n", end_pfn);
>          printk(KERN_WARNING "***************\n");
> }
>
> Here's the result:
> ***************
> **** WARNING: likely BIOS bug
> **** MTRRs don't cover all of memory, trimmed 18446744073709486080
> pages **** end_pfn before = 2293760
> **** end_pfn after = 2359296
> ***************
>
> Hope that's what you needed.

Yeah, that's what I needed.  end_pfn looks ok, but I guess my test is a 
little too precise.  It should be if ((highest_addr >> PAGE_SHIFT) < 
end_pfn) rather than !=.  That should keep it from trying to extend 
your memory with bogus math.  I thought that the two values should 
always match, but I guess slightly different memory configurations 
might break that assumption.

Thanks,
Jesse

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-27 16:22         ` Jesse Barnes
@ 2007-06-27 17:02           ` Pim Zandbergen
  2007-06-27 17:06             ` Jesse Barnes
  0 siblings, 1 reply; 118+ messages in thread
From: Pim Zandbergen @ 2007-06-27 17:02 UTC (permalink / raw)
  To: Jesse Barnes
  Cc: Mauro Giachero, linux-kernel, akpm, Andi Kleen, Justin Piszcz,
	Eric W. Biederman, Yinghai Lu

Jesse Barnes wrote:

> It looks like end_pfn might be ~0UL now... can you print that out in 
> your configuration?

Er, do you need the value of end_pfn ?
Here's what I changed:

if ((highest_addr >> PAGE_SHIFT) != end_pfn) {
         printk(KERN_WARNING "***************\n");
         printk(KERN_WARNING "**** WARNING: likely BIOS bug\n");
         printk(KERN_WARNING "**** MTRRs don't cover all of "
                "memory, trimmed %lu pages\n", end_pfn -
                (highest_addr >> PAGE_SHIFT));
         printk(KERN_WARNING "**** end_pfn before = %lu\n", end_pfn);
         end_pfn = highest_addr >> PAGE_SHIFT;
         printk(KERN_WARNING "**** end_pfn after = %lu\n", end_pfn);
         printk(KERN_WARNING "***************\n");
}

Here's the result:
***************
**** WARNING: likely BIOS bug
**** MTRRs don't cover all of memory, trimmed 18446744073709486080 pages
**** end_pfn before = 2293760
**** end_pfn after = 2359296
***************

Hope that's what you needed.
Pim


^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-27 16:00       ` Pim Zandbergen
  2007-06-27 16:07         ` Jesse Barnes
@ 2007-06-27 16:22         ` Jesse Barnes
  2007-06-27 17:02           ` Pim Zandbergen
  1 sibling, 1 reply; 118+ messages in thread
From: Jesse Barnes @ 2007-06-27 16:22 UTC (permalink / raw)
  To: Pim Zandbergen
  Cc: Mauro Giachero, linux-kernel, akpm, Andi Kleen, Justin Piszcz,
	Eric W. Biederman, Yinghai Lu

On Wednesday, June 27, 2007 9:00:33 Pim Zandbergen wrote:
> Jesse Barnes wrote:
> > Yeah, you're right I should use an unsigned format string.  Pim, if
> > you change it to %lu does the printk in your dmesg look better?
>
> Er, no.
>
> **** MTRRs don't cover all of memory, trimmed 18446744073709486080
> pages

It looks like end_pfn might be ~0UL now... can you print that out in 
your configuration?

Thanks,
Jesse

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-27 16:00       ` Pim Zandbergen
@ 2007-06-27 16:07         ` Jesse Barnes
  2007-06-27 16:22         ` Jesse Barnes
  1 sibling, 0 replies; 118+ messages in thread
From: Jesse Barnes @ 2007-06-27 16:07 UTC (permalink / raw)
  To: Pim Zandbergen
  Cc: Mauro Giachero, linux-kernel, akpm, Andi Kleen, Justin Piszcz,
	Eric W. Biederman, Yinghai Lu

On Wednesday, June 27, 2007 9:00:33 Pim Zandbergen wrote:
> Jesse Barnes wrote:
> > Yeah, you're right I should use an unsigned format string.  Pim, if
> > you change it to %lu does the printk in your dmesg look better?
>
> Er, no.
>
> **** MTRRs don't cover all of memory, trimmed 18446744073709486080
> pages

Hm, that's what I was afraid of.  So something else is wrong. :)

Jesse

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-27 15:04     ` Jesse Barnes
@ 2007-06-27 16:00       ` Pim Zandbergen
  2007-06-27 16:07         ` Jesse Barnes
  2007-06-27 16:22         ` Jesse Barnes
  0 siblings, 2 replies; 118+ messages in thread
From: Pim Zandbergen @ 2007-06-27 16:00 UTC (permalink / raw)
  To: Jesse Barnes
  Cc: Mauro Giachero, linux-kernel, akpm, Andi Kleen, Justin Piszcz,
	Eric W. Biederman, Yinghai Lu

Jesse Barnes wrote:

> Yeah, you're right I should use an unsigned format string.  Pim, if you 
> change it to %lu does the printk in your dmesg look better?

Er, no.

**** MTRRs don't cover all of memory, trimmed 18446744073709486080 pages

Thanks,
Pim

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-27 14:22   ` Mauro Giachero
@ 2007-06-27 15:04     ` Jesse Barnes
  2007-06-27 16:00       ` Pim Zandbergen
  0 siblings, 1 reply; 118+ messages in thread
From: Jesse Barnes @ 2007-06-27 15:04 UTC (permalink / raw)
  To: Mauro Giachero
  Cc: Pim Zandbergen, linux-kernel, akpm, Andi Kleen, Justin Piszcz,
	Eric W. Biederman, Yinghai Lu

On Wednesday, June 27, 2007 7:22:24 Mauro Giachero wrote:
> On 6/27/07, Pim Zandbergen <P.Zandbergen@macroscoop.nl> wrote:
> > Now:
> > Jesse released a new patch and I tried if for fun on 2.6.22-rc6
> > It looks like the patch is releasing memory rather than trimming
> > it:
> >
> > [...]
> > Jun 27 12:22:56 corneille kernel: **** MTRRs don't cover all of
> > memory, trimmed -65536 pages
> > [...]
>
> From Jesse's patch:
> > +       unsigned long i, base, size, highest_addr = 0, def, dummy;
> > [...]
> > +               printk(KERN_WARNING "**** MTRRs don't cover all of
> > " +                      "memory, trimmed %ld pages\n", end_pfn - +
> >                      (highest_addr >> PAGE_SHIFT));
>
> Since both end_pfn (from arch/x86_64/kernel/e820.c) and highest_addr
> are unsigned long, maybe the problem is just that %ld in the kprintf
> format string? Shouldn't that be %lu?

Yeah, you're right I should use an unsigned format string.  Pim, if you 
change it to %lu does the printk in your dmesg look better?

Thanks,
Jesse

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-27 10:44 ` Pim Zandbergen
  2007-06-27 11:22   ` Andi Kleen
@ 2007-06-27 14:22   ` Mauro Giachero
  2007-06-27 15:04     ` Jesse Barnes
  1 sibling, 1 reply; 118+ messages in thread
From: Mauro Giachero @ 2007-06-27 14:22 UTC (permalink / raw)
  To: Pim Zandbergen
  Cc: Jesse Barnes, linux-kernel, akpm, Andi Kleen, Justin Piszcz,
	Eric W. Biederman, Yinghai Lu

On 6/27/07, Pim Zandbergen <P.Zandbergen@macroscoop.nl> wrote:
> Now:
> Jesse released a new patch and I tried if for fun on 2.6.22-rc6
> It looks like the patch is releasing memory rather than trimming it:
>
> [...]
> Jun 27 12:22:56 corneille kernel: **** MTRRs don't cover all of memory,
> trimmed -65536 pages
> [...]

>From Jesse's patch:
> +       unsigned long i, base, size, highest_addr = 0, def, dummy;
> [...]
> +               printk(KERN_WARNING "**** MTRRs don't cover all of "
> +                      "memory, trimmed %ld pages\n", end_pfn -
> +                      (highest_addr >> PAGE_SHIFT));

Since both end_pfn (from arch/x86_64/kernel/e820.c) and highest_addr
are unsigned long, maybe the problem is just that %ld in the kprintf
format string? Shouldn't that be %lu?

Mauro

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-27 11:40     ` Pim Zandbergen
@ 2007-06-27 11:44       ` Justin Piszcz
  0 siblings, 0 replies; 118+ messages in thread
From: Justin Piszcz @ 2007-06-27 11:44 UTC (permalink / raw)
  To: Pim Zandbergen
  Cc: Andi Kleen, Jesse Barnes, linux-kernel, akpm, Eric W. Biederman,
	Yinghai Lu



On Wed, 27 Jun 2007, Pim Zandbergen wrote:

> Andi Kleen wrote:
>
>> That's impossible. Either it limited your RAM or it didn't change anything.
>
> OK, maybe it's cosmetic, but I would not expect a negative number
>
> With old BIOS it printed
> **** MTRRs don't cover all of memory, trimmed 196608 pages
>
> with new BIOS it prints
> **** MTRRs don't cover all of memory, trimmed -65536 pages
>
> Pim
>

It works ok for me:

[    0.000000] **** MTRRs don't cover all of memory, trimmed 16384 pages

This is using his latest patch with 2.6.22-rc6.

Justin.

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-27 11:22   ` Andi Kleen
@ 2007-06-27 11:40     ` Pim Zandbergen
  2007-06-27 11:44       ` Justin Piszcz
  0 siblings, 1 reply; 118+ messages in thread
From: Pim Zandbergen @ 2007-06-27 11:40 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Jesse Barnes, linux-kernel, akpm, Justin Piszcz,
	Eric W. Biederman, Yinghai Lu

Andi Kleen wrote:

> That's impossible. Either it limited your RAM or it didn't change anything.

OK, maybe it's cosmetic, but I would not expect a negative number

With old BIOS it printed
**** MTRRs don't cover all of memory, trimmed 196608 pages

with new BIOS it prints
**** MTRRs don't cover all of memory, trimmed -65536 pages

Pim

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-27 10:44 ` Pim Zandbergen
@ 2007-06-27 11:22   ` Andi Kleen
  2007-06-27 11:40     ` Pim Zandbergen
  2007-06-27 14:22   ` Mauro Giachero
  1 sibling, 1 reply; 118+ messages in thread
From: Andi Kleen @ 2007-06-27 11:22 UTC (permalink / raw)
  To: Pim Zandbergen
  Cc: Jesse Barnes, linux-kernel, akpm, Andi Kleen, Justin Piszcz,
	Eric W. Biederman, Yinghai Lu

On Wed, Jun 27, 2007 at 12:44:01PM +0200, Pim Zandbergen wrote:
> Jesse saved my life by releasing a patch that made my GigaByte Intel G33
> based motherboard use all of its 8GB RAM and not be slow as hell.

That's impossible. Either it limited your RAM or it didn't change anything.

-Andi

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-25 21:34 Jesse Barnes
                   ` (2 preceding siblings ...)
  2007-06-25 23:34 ` Andi Kleen
@ 2007-06-27 10:44 ` Pim Zandbergen
  2007-06-27 11:22   ` Andi Kleen
  2007-06-27 14:22   ` Mauro Giachero
  2007-07-05 12:12 ` Pavel Machek
  4 siblings, 2 replies; 118+ messages in thread
From: Pim Zandbergen @ 2007-06-27 10:44 UTC (permalink / raw)
  To: Jesse Barnes
  Cc: linux-kernel, akpm, Andi Kleen, Justin Piszcz, Eric W. Biederman,
	Yinghai Lu

First:
Jesse saved my life by releasing a patch that made my GigaByte Intel G33
based motherboard use all of its 8GB RAM and not be slow as hell.

Then:
GigaByte released a BIOS update that fixed the root of the problem.
I went back from patched vanilla kernel to "official" Fedora kernel.

Now:
Jesse released a new patch and I tried if for fun on 2.6.22-rc6
It looks like the patch is releasing memory rather than trimming it:

Jun 27 12:22:56 corneille kernel: Linux version 2.6.22-rc6 
(pim@corneille) (gcc version 4.1.2 20070502 (Red Hat 4.1.2-12)) #1 SMP 
Wed Jun 27 11:41:29 CEST 2007
Jun 27 12:22:56 corneille kernel: Command line: root=LABEL=/ 
console=ttyS0,38400
Jun 27 12:22:56 corneille kernel: BIOS-provided physical RAM map:
Jun 27 12:22:56 corneille kernel:  BIOS-e820: 0000000000000000 - 
000000000009e800 (usable)
Jun 27 12:22:56 corneille kernel:  BIOS-e820: 000000000009f800 - 
00000000000a0000 (reserved)
Jun 27 12:22:56 corneille kernel:  BIOS-e820: 00000000000f0000 - 
0000000000100000 (reserved)
Jun 27 12:22:56 corneille kernel:  BIOS-e820: 0000000000100000 - 
00000000cf5e0000 (usable)
Jun 27 12:22:56 corneille kernel:  BIOS-e820: 00000000cf5e0000 - 
00000000cf5e3000 (ACPI NVS)
Jun 27 12:22:56 corneille kernel:  BIOS-e820: 00000000cf5e3000 - 
00000000cf5f0000 (ACPI data)
Jun 27 12:22:56 corneille kernel:  BIOS-e820: 00000000cf5f0000 - 
00000000cf600000 (reserved)
Jun 27 12:22:56 corneille kernel:  BIOS-e820: 00000000d0000000 - 
00000000e0000000 (reserved)
Jun 27 12:22:56 corneille kernel:  BIOS-e820: 00000000fec00000 - 
0000000100000000 (reserved)
Jun 27 12:22:56 corneille kernel:  BIOS-e820: 0000000100000000 - 
0000000230000000 (usable)
Jun 27 12:22:56 corneille kernel: end_pfn_map = 2293760
Jun 27 12:22:56 corneille kernel: ***************
Jun 27 12:22:56 corneille kernel: **** WARNING: likely BIOS bug
Jun 27 12:22:56 corneille kernel: **** MTRRs don't cover all of memory, 
trimmed -65536 pages
Jun 27 12:22:56 corneille kernel: ***************

I tried the earlier patch as well and it gives the same result.

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-26 17:38           ` Andi Kleen
@ 2007-06-26 18:55             ` Yinghai Lu
  0 siblings, 0 replies; 118+ messages in thread
From: Yinghai Lu @ 2007-06-26 18:55 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Eric W. Biederman, Jesse Barnes, linux-kernel, akpm, Justin Piszcz

On 6/26/07, Andi Kleen <andi@firstfloor.org> wrote:
> RevE had a new memory remapping scheme (memory hoisting) at least, which
> I think you refered to earlier.  There might have been more changes in F.

yes, REV E has hardware memory remapping.
Rev F you don't need to use mtrr to set MEM to WE above 4G.

YH

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-26 16:06         ` Eric W. Biederman
@ 2007-06-26 17:38           ` Andi Kleen
  2007-06-26 18:55             ` Yinghai Lu
  0 siblings, 1 reply; 118+ messages in thread
From: Andi Kleen @ 2007-06-26 17:38 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Andi Kleen, Jesse Barnes, linux-kernel, akpm, Justin Piszcz, Yinghai Lu

On Tue, Jun 26, 2007 at 10:06:41AM -0600, Eric W. Biederman wrote:
> Andi Kleen <andi@firstfloor.org> writes:
> 
> >> For the K7 and K8 cores AMD systems are exactly like Intel systems
> >> with respect to MTRRs (although AMD systems also have additional registers)
> >> For the K9 core (i.e. AMD socket F or the K8 with DDR2 support) there
> >
> > It's called K8RevE, not K9
> 
> revF not revE.  I think AMD was code-naming that K9 before the socket F
> part was released.

I didn't think so.

> 
> revE was the last DDR rev of the K8 core.

RevE had a new memory remapping scheme (memory hoisting) at least, which
I think you refered to earlier.  There might have been more changes in F.

-Andi

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-26 15:39       ` Andi Kleen
  2007-06-26 15:54         ` Yinghai Lu
@ 2007-06-26 16:06         ` Eric W. Biederman
  2007-06-26 17:38           ` Andi Kleen
  1 sibling, 1 reply; 118+ messages in thread
From: Eric W. Biederman @ 2007-06-26 16:06 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Jesse Barnes, linux-kernel, akpm, Justin Piszcz, Yinghai Lu

Andi Kleen <andi@firstfloor.org> writes:

>> For the K7 and K8 cores AMD systems are exactly like Intel systems
>> with respect to MTRRs (although AMD systems also have additional registers)
>> For the K9 core (i.e. AMD socket F or the K8 with DDR2 support) there
>
> It's called K8RevE, not K9

revF not revE.  I think AMD was code-naming that K9 before the socket F
part was released.

revE was the last DDR rev of the K8 core.

>> is an additional mechanism that makes everything above 4G write-back
>> cacheable without using any MTRRs.
>
> ... but not BIOS use this mechanism (often there are BIOS switches
> for several MTRR models or it is just the wrong one hardcoded), so Linux 
> should detect the broken cases.

Yes. They are and I have seen at least two motherboards that fit this
description.  I almost freaked out looking at a system with 16G and
only 4G setup to be cached.

The painful bit is I have also seen such a system with not all of the
lower 4G cached.  Which caused some interesting booting issues.

Eric

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-26 15:39       ` Andi Kleen
@ 2007-06-26 15:54         ` Yinghai Lu
  2007-06-26 16:06         ` Eric W. Biederman
  1 sibling, 0 replies; 118+ messages in thread
From: Yinghai Lu @ 2007-06-26 15:54 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Eric W. Biederman, Jesse Barnes, linux-kernel, akpm, Justin Piszcz

On 6/26/07, Andi Kleen <andi@firstfloor.org> wrote:
> > For the K7 and K8 cores AMD systems are exactly like Intel systems
> > with respect to MTRRs (although AMD systems also have additional registers)
> > For the K9 core (i.e. AMD socket F or the K8 with DDR2 support) there
>
> It's called K8RevE, not K9

For K8 Rev F and later, if you are using TOM2, cpu will assume the mem
between 4G and TOM2 is WB.

I think rule could be:
scan the var mtrrs to find out if there is any mtrr is used for 4G
above is set to WB, if it is true, will use trimming tricks.

YH

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-26  0:54     ` Eric W. Biederman
  2007-06-26  3:29       ` Jesse Barnes
@ 2007-06-26 15:39       ` Andi Kleen
  2007-06-26 15:54         ` Yinghai Lu
  2007-06-26 16:06         ` Eric W. Biederman
  1 sibling, 2 replies; 118+ messages in thread
From: Andi Kleen @ 2007-06-26 15:39 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Jesse Barnes, Andi Kleen, linux-kernel, akpm, Justin Piszcz, Yinghai Lu

> For the K7 and K8 cores AMD systems are exactly like Intel systems
> with respect to MTRRs (although AMD systems also have additional registers)
> For the K9 core (i.e. AMD socket F or the K8 with DDR2 support) there

It's called K8RevE, not K9

> is an additional mechanism that makes everything above 4G write-back
> cacheable without using any MTRRs.

... but not BIOS use this mechanism (often there are BIOS switches
for several MTRR models or it is just the wrong one hardcoded), so Linux 
should detect the broken cases.

-Andi

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-26 15:02     ` Andi Kleen
@ 2007-06-26 15:38       ` Jesse Barnes
  0 siblings, 0 replies; 118+ messages in thread
From: Jesse Barnes @ 2007-06-26 15:38 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Jesse Barnes, linux-kernel, akpm, Justin Piszcz,
	Eric W. Biederman, Yinghai Lu

On Tuesday, June 26, 2007 8:02:49 am Andi Kleen wrote:
> On Mon, Jun 25, 2007 at 04:36:52PM -0700, Jesse Barnes wrote:
> > On Monday, June 25, 2007 4:34:33 Andi Kleen wrote:
> > > > This patch fixes a bug in the last patch that caused the code to
> > > > run on non-Intel machines (AMD machines apparently don't need it
> > >
> > > Actually the problem can happen on AMD too, but the symptoms can
> > > be different and there can be more wrong than just the MTRRs.
> >
> > I should have been more specific in the changelog.  My understanding is
> > that AMD systems don't need it for memory above 4G, and since the code
>
> AMD systems need MTRRs to get cached memory too, or what is your point?

My point is that yes, this problem can happen on AMD, but the code doesn't 
handle the problems that AMD systems might have, since it doesn't look for 
problems in low memory (e.g. if you have an AMD system with 6G of memory, the 
code will probably trim everything above 4G since it doesn't know about the 
new AMD mapping stuff from RevE), as Eric pointed out.

Both you and Eric say you've seen AMD systems with problems, but handling them 
would make the code more complex than it is now, and I haven't seen the 
actual reports (memory maps & MTRR setups) so I can't really fix them anyway.  
And since this patch solves real problems as-is, it seems like we should push 
it upstream then rework it, if necessary (i.e. real user machines with this 
problem) later.

What do you think?

Jesse

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-26 15:07             ` Jesse Barnes
@ 2007-06-26 15:18               ` Jesse Barnes
  0 siblings, 0 replies; 118+ messages in thread
From: Jesse Barnes @ 2007-06-26 15:18 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Eric W. Biederman, Jesse Barnes, linux-kernel, akpm,
	Justin Piszcz, Yinghai Lu

On Tuesday, June 26, 2007 8:07:45 am Jesse Barnes wrote:
> On Tuesday, June 26, 2007 8:03:48 am Andi Kleen wrote:
> > On Mon, Jun 25, 2007 at 08:30:52PM -0700, Jesse Barnes wrote:
> > > Oh, and FYI I've seen new systems with a default mapping type of WB,
> > > with a few uncached holes for MMIO.  That means PAT will be absolutely
> > > required on those systems if we want to use WC for the framebuffer or
> > > other memory.
> >
> > Why? As long as there are still MTRRs free you could use them for
> > WC even in this setup.
>
> According to the manual, you can't use WC or WT MTRRs on top of a WB MTRR
> range.  Only UC MTRRs can override WB MTRRs, but PAT doesn't have this
> restriction.

Actually, it looks like WT on top of WB is ok, but not WC on top of WB, 
quoting from the manual:

> 1. If the physical address falls within the first 1 MByte of physical
> memory and fixed MTRRs are enabled, the processor uses the memory type
> stored for the appropriate fixed-range MTRR.
>
> 2. Otherwise, the processor attempts to match the physical address with a
> memory type set by the variable-range MTRRs:
>
>     a. If one variable memory range matches, the processor uses the memory
>        type stored in the IA32_MTRR_PHYSBASEn register for that range.
>     b. If two or more variable memory ranges match and the memory types are
>        identical, then that memory type is used.
>     c. If two or more variable memory ranges match and one of the memory
>        types is UC, the UC memory type used.
>     d. If two or more variable memory ranges match and the memory types are
>        WT and WB, the WT memory type is used.
>     e. For overlaps not defined by the above rules, processor behavior is
>        undefined.
>
> 3. If no fixed or variable memory range matches, the processor uses the
> default memory type.


^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-26 15:03           ` Andi Kleen
@ 2007-06-26 15:07             ` Jesse Barnes
  2007-06-26 15:18               ` Jesse Barnes
  0 siblings, 1 reply; 118+ messages in thread
From: Jesse Barnes @ 2007-06-26 15:07 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Eric W. Biederman, Jesse Barnes, linux-kernel, akpm,
	Justin Piszcz, Yinghai Lu

On Tuesday, June 26, 2007 8:03:48 am Andi Kleen wrote:
> On Mon, Jun 25, 2007 at 08:30:52PM -0700, Jesse Barnes wrote:
> > Oh, and FYI I've seen new systems with a default mapping type of WB, with
> > a few uncached holes for MMIO.  That means PAT will be absolutely
> > required on those systems if we want to use WC for the framebuffer or
> > other memory.
>
> Why? As long as there are still MTRRs free you could use them for
> WC even in this setup.

According to the manual, you can't use WC or WT MTRRs on top of a WB MTRR 
range.  Only UC MTRRs can override WB MTRRs, but PAT doesn't have this 
restriction.

Jesse


^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-26  3:30         ` Jesse Barnes
@ 2007-06-26 15:03           ` Andi Kleen
  2007-06-26 15:07             ` Jesse Barnes
  0 siblings, 1 reply; 118+ messages in thread
From: Andi Kleen @ 2007-06-26 15:03 UTC (permalink / raw)
  To: Jesse Barnes
  Cc: Eric W. Biederman, Jesse Barnes, Andi Kleen, linux-kernel, akpm,
	Justin Piszcz, Yinghai Lu

On Mon, Jun 25, 2007 at 08:30:52PM -0700, Jesse Barnes wrote:
> Oh, and FYI I've seen new systems with a default mapping type of WB, with a 
> few uncached holes for MMIO.  That means PAT will be absolutely required on 
> those systems if we want to use WC for the framebuffer or other memory. 

Why? As long as there are still MTRRs free you could use them for 
WC even in this setup.

-Andi

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-25 23:36   ` Jesse Barnes
  2007-06-26  0:54     ` Eric W. Biederman
@ 2007-06-26 15:02     ` Andi Kleen
  2007-06-26 15:38       ` Jesse Barnes
  1 sibling, 1 reply; 118+ messages in thread
From: Andi Kleen @ 2007-06-26 15:02 UTC (permalink / raw)
  To: Jesse Barnes
  Cc: Andi Kleen, linux-kernel, akpm, Justin Piszcz, Eric W. Biederman,
	Yinghai Lu

On Mon, Jun 25, 2007 at 04:36:52PM -0700, Jesse Barnes wrote:
> On Monday, June 25, 2007 4:34:33 Andi Kleen wrote:
> > > This patch fixes a bug in the last patch that caused the code to
> > > run on non-Intel machines (AMD machines apparently don't need it
> >
> > Actually the problem can happen on AMD too, but the symptoms can
> > be different and there can be more wrong than just the MTRRs.
> 
> I should have been more specific in the changelog.  My understanding is 
> that AMD systems don't need it for memory above 4G, and since the code 

AMD systems need MTRRs to get cached memory too, or what is your point?

There was a new memory remapping feature in RevE but I didn't think
it obsoleted MTRRs.

We've also had systems with this issue.

-Andi

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-26  3:29       ` Jesse Barnes
@ 2007-06-26  3:30         ` Jesse Barnes
  2007-06-26 15:03           ` Andi Kleen
  0 siblings, 1 reply; 118+ messages in thread
From: Jesse Barnes @ 2007-06-26  3:30 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Jesse Barnes, Andi Kleen, linux-kernel, akpm, Justin Piszcz, Yinghai Lu

On Monday, June 25, 2007 8:29:35 pm Jesse Barnes wrote:
> Is there an is_cpu() check to differentiate between those?  Anyway I'd
> rather not enable it unless we see reports though...  So far I've only seen
> reports of this problem on some recent Intel based systems.

Oh, and FYI I've seen new systems with a default mapping type of WB, with a 
few uncached holes for MMIO.  That means PAT will be absolutely required on 
those systems if we want to use WC for the framebuffer or other memory.  I 
hope it's ready for mainline soon...

Jesse

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-26  0:54     ` Eric W. Biederman
@ 2007-06-26  3:29       ` Jesse Barnes
  2007-06-26  3:30         ` Jesse Barnes
  2007-06-26 15:39       ` Andi Kleen
  1 sibling, 1 reply; 118+ messages in thread
From: Jesse Barnes @ 2007-06-26  3:29 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Jesse Barnes, Andi Kleen, linux-kernel, akpm, Justin Piszcz, Yinghai Lu

On Monday, June 25, 2007 5:54:49 pm Eric W. Biederman wrote:
> Jesse Barnes <jesse.barnes@intel.com> writes:
> > On Monday, June 25, 2007 4:34:33 Andi Kleen wrote:
> >> > This patch fixes a bug in the last patch that caused the code to
> >> > run on non-Intel machines (AMD machines apparently don't need it
> >>
> >> Actually the problem can happen on AMD too, but the symptoms can
> >> be different and there can be more wrong than just the MTRRs.
> >
> > I should have been more specific in the changelog.  My understanding is
> > that AMD systems don't need it for memory above 4G, and since the code
> > doesn't handle holes (no test systems, nor any real reports that I've
> > seen), it's not that useful for finding problems below 4G.  We can
> > always change that later if needed though.
>
> For the K7 and K8 cores AMD systems are exactly like Intel systems
> with respect to MTRRs (although AMD systems also have additional registers)
> For the K9 core (i.e. AMD socket F or the K8 with DDR2 support) there
> is an additional mechanism that makes everything above 4G write-back
> cacheable without using any MTRRs.
>
> So only on the very latest AMD cpus would this code not be applicable.

Is there an is_cpu() check to differentiate between those?  Anyway I'd rather 
not enable it unless we see reports though...  So far I've only seen reports 
of this problem on some recent Intel based systems.

Jesse



^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-25 23:36   ` Jesse Barnes
@ 2007-06-26  0:54     ` Eric W. Biederman
  2007-06-26  3:29       ` Jesse Barnes
  2007-06-26 15:39       ` Andi Kleen
  2007-06-26 15:02     ` Andi Kleen
  1 sibling, 2 replies; 118+ messages in thread
From: Eric W. Biederman @ 2007-06-26  0:54 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: Andi Kleen, linux-kernel, akpm, Justin Piszcz, Yinghai Lu

Jesse Barnes <jesse.barnes@intel.com> writes:

> On Monday, June 25, 2007 4:34:33 Andi Kleen wrote:
>> > This patch fixes a bug in the last patch that caused the code to
>> > run on non-Intel machines (AMD machines apparently don't need it
>>
>> Actually the problem can happen on AMD too, but the symptoms can
>> be different and there can be more wrong than just the MTRRs.
>
> I should have been more specific in the changelog.  My understanding is 
> that AMD systems don't need it for memory above 4G, and since the code 
> doesn't handle holes (no test systems, nor any real reports that I've 
> seen), it's not that useful for finding problems below 4G.  We can 
> always change that later if needed though.

For the K7 and K8 cores AMD systems are exactly like Intel systems
with respect to MTRRs (although AMD systems also have additional registers)
For the K9 core (i.e. AMD socket F or the K8 with DDR2 support) there
is an additional mechanism that makes everything above 4G write-back
cacheable without using any MTRRs.

So only on the very latest AMD cpus would this code not be applicable.

Eric

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-25 23:34 ` Andi Kleen
@ 2007-06-25 23:36   ` Jesse Barnes
  2007-06-26  0:54     ` Eric W. Biederman
  2007-06-26 15:02     ` Andi Kleen
  0 siblings, 2 replies; 118+ messages in thread
From: Jesse Barnes @ 2007-06-25 23:36 UTC (permalink / raw)
  To: Andi Kleen
  Cc: linux-kernel, akpm, Justin Piszcz, Eric W. Biederman, Yinghai Lu

On Monday, June 25, 2007 4:34:33 Andi Kleen wrote:
> > This patch fixes a bug in the last patch that caused the code to
> > run on non-Intel machines (AMD machines apparently don't need it
>
> Actually the problem can happen on AMD too, but the symptoms can
> be different and there can be more wrong than just the MTRRs.

I should have been more specific in the changelog.  My understanding is 
that AMD systems don't need it for memory above 4G, and since the code 
doesn't handle holes (no test systems, nor any real reports that I've 
seen), it's not that useful for finding problems below 4G.  We can 
always change that later if needed though.

Thanks,
Jesse

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-25 21:34 Jesse Barnes
  2007-06-25 21:45 ` Justin Piszcz
  2007-06-25 22:01 ` Andrew Morton
@ 2007-06-25 23:34 ` Andi Kleen
  2007-06-25 23:36   ` Jesse Barnes
  2007-06-27 10:44 ` Pim Zandbergen
  2007-07-05 12:12 ` Pavel Machek
  4 siblings, 1 reply; 118+ messages in thread
From: Andi Kleen @ 2007-06-25 23:34 UTC (permalink / raw)
  To: Jesse Barnes
  Cc: linux-kernel, akpm, Andi Kleen, Justin Piszcz, Eric W. Biederman,
	Yinghai Lu

> This patch fixes a bug in the last patch that caused the code to
> run on non-Intel machines (AMD machines apparently don't need it

Actually the problem can happen on AMD too, but the symptoms can
be different and there can be more wrong than just the MTRRs.

-Andi

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-25 22:05   ` Jesse Barnes
@ 2007-06-25 22:29     ` Justin Piszcz
  0 siblings, 0 replies; 118+ messages in thread
From: Justin Piszcz @ 2007-06-25 22:29 UTC (permalink / raw)
  To: Jesse Barnes
  Cc: Andrew Morton, linux-kernel, Andi Kleen, Eric W. Biederman, Yinghai Lu



On Mon, 25 Jun 2007, Jesse Barnes wrote:

> On Monday, June 25, 2007 3:01:27 Andrew Morton wrote:
>> On Mon, 25 Jun 2007 14:34:42 -0700
>>
>> Jesse Barnes <jesse.barnes@intel.com> wrote:
>>> akpm -- this one should replace all the mtrr patches currently
>>> in your tree.
>>
>> fear, uncertainty, doubt.
>>
>> box:/usr/src/25> grep mtrr series
>> x86_64-mm-bug-in-i386-mtrr-initialization.patch
>> x86-fix-section-mismatch-warnings-in-mtrr.patch
>> i386-x86_64-trim-memory-not-covered-by-wb-mtrrs.patch
>> i386-x86_64-trim-memory-not-covered-by-wb-mtrrs-fix.patch
>> i386-x86_64-trim-memory-not-covered-by-wb-mtrrs-fix-2.patch
>> i386-mtrr-clean-up-usage_table.patch
>>
>> Not all of those, I'm sure.  I think you're referring to the final
>> four.  I guess if it doesn't compile (again) I'll find out ;)
>
> Oops, correct.  I forgot you were carrying some MTRR patches not
> authored by me.  :)  Yeah, the last four are the ones I meant.
>
> Thanks,
> Jesse
>

Hi Jesse,

Tested your patch against 2.6.22-rc6, working good so far!

$ uname -r
2.6.22-rc6

Mem:   8039576k total,  1056500k used,  6983076k free,     2420k buffers
Swap: 16787768k total,        0k used, 16787768k free,   127128k cached

Justin.


^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-25 22:01 ` Andrew Morton
@ 2007-06-25 22:05   ` Jesse Barnes
  2007-06-25 22:29     ` Justin Piszcz
  0 siblings, 1 reply; 118+ messages in thread
From: Jesse Barnes @ 2007-06-25 22:05 UTC (permalink / raw)
  To: Andrew Morton
  Cc: linux-kernel, Andi Kleen, Justin Piszcz, Eric W. Biederman, Yinghai Lu

On Monday, June 25, 2007 3:01:27 Andrew Morton wrote:
> On Mon, 25 Jun 2007 14:34:42 -0700
>
> Jesse Barnes <jesse.barnes@intel.com> wrote:
> > akpm -- this one should replace all the mtrr patches currently
> > in your tree.
>
> fear, uncertainty, doubt.
>
> box:/usr/src/25> grep mtrr series
> x86_64-mm-bug-in-i386-mtrr-initialization.patch
> x86-fix-section-mismatch-warnings-in-mtrr.patch
> i386-x86_64-trim-memory-not-covered-by-wb-mtrrs.patch
> i386-x86_64-trim-memory-not-covered-by-wb-mtrrs-fix.patch
> i386-x86_64-trim-memory-not-covered-by-wb-mtrrs-fix-2.patch
> i386-mtrr-clean-up-usage_table.patch
>
> Not all of those, I'm sure.  I think you're referring to the final
> four.  I guess if it doesn't compile (again) I'll find out ;)

Oops, correct.  I forgot you were carrying some MTRR patches not 
authored by me.  :)  Yeah, the last four are the ones I meant.

Thanks,
Jesse

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-25 21:34 Jesse Barnes
  2007-06-25 21:45 ` Justin Piszcz
@ 2007-06-25 22:01 ` Andrew Morton
  2007-06-25 22:05   ` Jesse Barnes
  2007-06-25 23:34 ` Andi Kleen
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 118+ messages in thread
From: Andrew Morton @ 2007-06-25 22:01 UTC (permalink / raw)
  To: Jesse Barnes
  Cc: linux-kernel, Andi Kleen, Justin Piszcz, Eric W. Biederman, Yinghai Lu

On Mon, 25 Jun 2007 14:34:42 -0700
Jesse Barnes <jesse.barnes@intel.com> wrote:

> akpm -- this one should replace all the mtrr patches currently
> in your tree.

fear, uncertainty, doubt.

box:/usr/src/25> grep mtrr series 
x86_64-mm-bug-in-i386-mtrr-initialization.patch
x86-fix-section-mismatch-warnings-in-mtrr.patch
i386-x86_64-trim-memory-not-covered-by-wb-mtrrs.patch
i386-x86_64-trim-memory-not-covered-by-wb-mtrrs-fix.patch
i386-x86_64-trim-memory-not-covered-by-wb-mtrrs-fix-2.patch
i386-mtrr-clean-up-usage_table.patch

Not all of those, I'm sure.  I think you're referring to the final
four.  I guess if it doesn't compile (again) I'll find out ;)

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-25 21:34 Jesse Barnes
@ 2007-06-25 21:45 ` Justin Piszcz
  2007-06-25 22:01 ` Andrew Morton
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 118+ messages in thread
From: Justin Piszcz @ 2007-06-25 21:45 UTC (permalink / raw)
  To: Jesse Barnes
  Cc: linux-kernel, akpm, Andi Kleen, Eric W. Biederman, Yinghai Lu

Will try this patch shortly w/ 2.6.22-rc6.

p34:/usr/src/linux-2.6.22-rc6# patch -p1 < ../mtrr-v3.patch
patching file Documentation/kernel-parameters.txt
patching file arch/i386/kernel/cpu/mtrr/generic.c
patching file arch/i386/kernel/cpu/mtrr/if.c
patching file arch/i386/kernel/cpu/mtrr/main.c
patching file arch/i386/kernel/cpu/mtrr/mtrr.h
patching file arch/x86_64/kernel/bugs.c
patching file arch/x86_64/kernel/setup.c
patching file include/asm-x86_64/mtrr.h
p34:/usr/src/linux-2.6.22-rc6#

So far so good.

On Mon, 25 Jun 2007, Jesse Barnes wrote:

> On some machines, buggy BIOSes don't properly setup WB MTRRs to
> cover all available RAM, meaning the last few megs (or even gigs)
> of memory will be marked uncached.  Since Linux tends to allocate
> from high memory addresses first, this causes the machine to be
> unusably slow as soon as the kernel starts really using memory
> (i.e. right around init time).
>
> This patch works around the problem by scanning the MTRRs at
> boot and figuring out whether the current end_pfn value (setup
> by early e820 code) goes beyond the highest WB MTRR range, and
> if so, trimming it to match.  A fairly obnoxious KERN_WARNING
> is printed too, letting the user know that not all of their
> memory is available due to a likely BIOS bug.
>
> Something similar could be done on i386 if needed, but the boot
> ordering would be slightly different, since the MTRR code on i386
> depends on the boot_cpu_data structure being setup.
>
> This patch fixes a bug in the last patch that caused the code to
> run on non-Intel machines (AMD machines apparently don't need it
> and it's untested on other non-Intel machines, so best keep it
> off).
>
> akpm -- this one should replace all the mtrr patches currently
> in your tree.
>
> Yinghai, maybe you can test this on one of your AMD machines to
> make sure I got the CPU code right?
>
> Signed-off-by:  Jesse Barnes <jesse.barnes@intel.com>
>
> Thanks,
> Jesse
>
> diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
> index 5d0283c..642db9b 100644
> --- a/Documentation/kernel-parameters.txt
> +++ b/Documentation/kernel-parameters.txt
> @@ -553,6 +553,12 @@ and is between 256 and 4096 characters. It is defined in the file
> 			See drivers/char/README.epca and
> 			Documentation/digiepca.txt.
>
> +	disable_mtrr_trim [X86-64, Intel only]
> +			By default the kernel will trim any uncacheable
> +			memory out of your available memory pool based on
> +			MTRR settings.  This parameter disables that behavior,
> +			possibly causing your machine to run very slowly.
> +
> 	dmascc=		[HW,AX25,SERIAL] AX.25 Z80SCC driver with DMA
> 			support available.
> 			Format: <io_dev0>[,<io_dev1>[,..<io_dev32>]]
> diff --git a/arch/i386/kernel/cpu/mtrr/generic.c b/arch/i386/kernel/cpu/mtrr/generic.c
> index 6d59378..bf70d2d 100644
> --- a/arch/i386/kernel/cpu/mtrr/generic.c
> +++ b/arch/i386/kernel/cpu/mtrr/generic.c
> @@ -13,7 +13,7 @@
> #include "mtrr.h"
>
> struct mtrr_state {
> -	struct mtrr_var_range *var_ranges;
> +	struct mtrr_var_range var_ranges[MAX_VAR_RANGES];
> 	mtrr_type fixed_ranges[NUM_FIXED_RANGES];
> 	unsigned char enabled;
> 	unsigned char have_fixed;
> @@ -84,12 +84,6 @@ void get_mtrr_state(void)
> 	struct mtrr_var_range *vrs;
> 	unsigned lo, dummy;
>
> -	if (!mtrr_state.var_ranges) {
> -		mtrr_state.var_ranges = kmalloc(num_var_ranges * sizeof (struct mtrr_var_range),
> -						GFP_KERNEL);
> -		if (!mtrr_state.var_ranges)
> -			return;
> -	}
> 	vrs = mtrr_state.var_ranges;
>
> 	rdmsr(MTRRcap_MSR, lo, dummy);
> diff --git a/arch/i386/kernel/cpu/mtrr/if.c b/arch/i386/kernel/cpu/mtrr/if.c
> index c7d8f17..1b3a09c 100644
> --- a/arch/i386/kernel/cpu/mtrr/if.c
> +++ b/arch/i386/kernel/cpu/mtrr/if.c
> @@ -11,10 +11,6 @@
> #include <asm/mtrr.h>
> #include "mtrr.h"
>
> -/* RED-PEN: this is accessed without any locking */
> -extern unsigned int *usage_table;
> -
> -
> #define FILE_FCOUNT(f) (((struct seq_file *)((f)->private_data))->private)
>
> static const char *const mtrr_strings[MTRR_NUM_TYPES] =
> @@ -396,7 +392,7 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset)
> 	for (i = 0; i < max; i++) {
> 		mtrr_if->get(i, &base, &size, &type);
> 		if (size == 0)
> -			usage_table[i] = 0;
> +			mtrr_usage_table[i] = 0;
> 		else {
> 			if (size < (0x100000 >> PAGE_SHIFT)) {
> 				/* less than 1MB */
> @@ -410,7 +406,7 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset)
> 			len += seq_printf(seq,
> 				   "reg%02i: base=0x%05lx000 (%4luMB), size=%4lu%cB: %s, count=%d\n",
> 			     i, base, base >> (20 - PAGE_SHIFT), size, factor,
> -			     mtrr_attrib_to_str(type), usage_table[i]);
> +			     mtrr_attrib_to_str(type), mtrr_usage_table[i]);
> 		}
> 	}
> 	return 0;
> diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c
> index 55b0051..0e4be8b 100644
> --- a/arch/i386/kernel/cpu/mtrr/main.c
> +++ b/arch/i386/kernel/cpu/mtrr/main.c
> @@ -38,8 +38,8 @@
> #include <linux/cpu.h>
> #include <linux/mutex.h>
>
> +#include <asm/e820.h>
> #include <asm/mtrr.h>
> -
> #include <asm/uaccess.h>
> #include <asm/processor.h>
> #include <asm/msr.h>
> @@ -47,7 +47,7 @@
>
> u32 num_var_ranges = 0;
>
> -unsigned int *usage_table;
> +unsigned int mtrr_usage_table[MAX_VAR_RANGES];
> static DEFINE_MUTEX(mtrr_mutex);
>
> u64 size_or_mask, size_and_mask;
> @@ -121,13 +121,8 @@ static void __init init_table(void)
> 	int i, max;
>
> 	max = num_var_ranges;
> -	if ((usage_table = kmalloc(max * sizeof *usage_table, GFP_KERNEL))
> -	    == NULL) {
> -		printk(KERN_ERR "mtrr: could not allocate\n");
> -		return;
> -	}
> 	for (i = 0; i < max; i++)
> -		usage_table[i] = 1;
> +		mtrr_usage_table[i] = 1;
> }
>
> struct set_mtrr_data {
> @@ -381,7 +376,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
> 			goto out;
> 		}
> 		if (increment)
> -			++usage_table[i];
> +			++mtrr_usage_table[i];
> 		error = i;
> 		goto out;
> 	}
> @@ -390,12 +385,13 @@ int mtrr_add_page(unsigned long base, unsigned long size,
> 	if (i >= 0) {
> 		set_mtrr(i, base, size, type);
> 		if (likely(replace < 0))
> -			usage_table[i] = 1;
> +			mtrr_usage_table[i] = 1;
> 		else {
> -			usage_table[i] = usage_table[replace] + !!increment;
> +			mtrr_usage_table[i] = mtrr_usage_table[replace] +
> +				!!increment;
> 			if (unlikely(replace != i)) {
> 				set_mtrr(replace, 0, 0, 0);
> -				usage_table[replace] = 0;
> +				mtrr_usage_table[replace] = 0;
> 			}
> 		}
> 	} else
> @@ -525,11 +521,11 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
> 		printk(KERN_WARNING "mtrr: MTRR %d not used\n", reg);
> 		goto out;
> 	}
> -	if (usage_table[reg] < 1) {
> +	if (mtrr_usage_table[reg] < 1) {
> 		printk(KERN_WARNING "mtrr: reg: %d has count=0\n", reg);
> 		goto out;
> 	}
> -	if (--usage_table[reg] < 1)
> +	if (--mtrr_usage_table[reg] < 1)
> 		set_mtrr(reg, 0, 0, 0);
> 	error = reg;
>  out:
> @@ -589,16 +585,11 @@ struct mtrr_value {
> 	unsigned long	lsize;
> };
>
> -static struct mtrr_value * mtrr_state;
> +static struct mtrr_value mtrr_state[MAX_VAR_RANGES];
>
> static int mtrr_save(struct sys_device * sysdev, pm_message_t state)
> {
> 	int i;
> -	int size = num_var_ranges * sizeof(struct mtrr_value);
> -
> -	mtrr_state = kzalloc(size,GFP_ATOMIC);
> -	if (!mtrr_state)
> -		return -ENOMEM;
>
> 	for (i = 0; i < num_var_ranges; i++) {
> 		mtrr_if->get(i,
> @@ -620,7 +611,6 @@ static int mtrr_restore(struct sys_device * sysdev)
> 				 mtrr_state[i].lsize,
> 				 mtrr_state[i].ltype);
> 	}
> -	kfree(mtrr_state);
> 	return 0;
> }
>
> @@ -631,6 +621,59 @@ static struct sysdev_driver mtrr_sysdev_driver = {
> 	.resume		= mtrr_restore,
> };
>
> +static int disable_mtrr_trim;
> +
> +static int __init disable_mtrr_trim_setup(char *str)
> +{
> +	disable_mtrr_trim = 1;
> +	return 0;
> +}
> +early_param("disable_mtrr_trim", disable_mtrr_trim_setup);
> +
> +#ifdef CONFIG_X86_64
> +/**
> + * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs
> + *
> + * Some buggy BIOSes don't setup the MTRRs properly for systems with certain
> + * memory configurations.  This routine checks to make sure the MTRRs having
> + * a write back type cover all of the memory the kernel is intending to use.
> + * If not, it'll trim any memory off the end by adjusting end_pfn, removing
> + * it from the kernel's allocation pools, warning the user with an obnoxious
> + * message.
> + */
> +void __init mtrr_trim_uncached_memory(void)
> +{
> +	unsigned long i, base, size, highest_addr = 0, def, dummy;
> +	mtrr_type type;
> +
> +	/* Make sure we only trim uncachable memory on Intel machines */
> +	rdmsr(MTRRdefType_MSR, def, dummy);
> +	def &= 0xff;
> +	if (!is_cpu(INTEL) || disable_mtrr_trim || def != MTRR_TYPE_UNCACHABLE)
> +		return;
> +
> +	/* Find highest cached pfn */
> +	for (i = 0; i < num_var_ranges; i++) {
> +		mtrr_if->get(i, &base, &size, &type);
> +		if (type != MTRR_TYPE_WRBACK)
> +			continue;
> +		base <<= PAGE_SHIFT;
> +		size <<= PAGE_SHIFT;
> +		if (highest_addr < base + size)
> +			highest_addr = base + size;
> +	}
> +
> +	if ((highest_addr >> PAGE_SHIFT) != end_pfn) {
> +		printk(KERN_WARNING "***************\n");
> +		printk(KERN_WARNING "**** WARNING: likely BIOS bug\n");
> +		printk(KERN_WARNING "**** MTRRs don't cover all of "
> +		       "memory, trimmed %ld pages\n", end_pfn -
> +		       (highest_addr >> PAGE_SHIFT));
> +		printk(KERN_WARNING "***************\n");
> +		end_pfn = highest_addr >> PAGE_SHIFT;
> +	}
> +}
> +#endif
>
> /**
>  * mtrr_bp_init - initialize mtrrs on the boot CPU
> diff --git a/arch/i386/kernel/cpu/mtrr/mtrr.h b/arch/i386/kernel/cpu/mtrr/mtrr.h
> index 289dfe6..14fb88b 100644
> --- a/arch/i386/kernel/cpu/mtrr/mtrr.h
> +++ b/arch/i386/kernel/cpu/mtrr/mtrr.h
> @@ -14,6 +14,7 @@
> #define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
>
> #define NUM_FIXED_RANGES 88
> +#define MAX_VAR_RANGES 256
> #define MTRRfix64K_00000_MSR 0x250
> #define MTRRfix16K_80000_MSR 0x258
> #define MTRRfix16K_A0000_MSR 0x259
> @@ -34,6 +35,8 @@
>    an 8 bit field: */
> typedef u8 mtrr_type;
>
> +extern unsigned int mtrr_usage_table[MAX_VAR_RANGES];
> +
> struct mtrr_ops {
> 	u32	vendor;
> 	u32	use_intel_if;
> diff --git a/arch/x86_64/kernel/bugs.c b/arch/x86_64/kernel/bugs.c
> index c3c6b91..c138eac 100644
> --- a/arch/x86_64/kernel/bugs.c
> +++ b/arch/x86_64/kernel/bugs.c
> @@ -14,7 +14,6 @@
> void __init check_bugs(void)
> {
> 	identify_cpu(&boot_cpu_data);
> -	mtrr_bp_init();
> #if !defined(CONFIG_SMP)
> 	printk("CPU: ");
> 	print_cpu_info(&boot_cpu_data);
> diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
> index eb6524f..409b63c 100644
> --- a/arch/x86_64/kernel/setup.c
> +++ b/arch/x86_64/kernel/setup.c
> @@ -266,6 +266,10 @@ void __init setup_arch(char **cmdline_p)
> 	 * we are rounding upwards:
> 	 */
> 	end_pfn = e820_end_of_ram();
> +	/* Trim memory not covered by WB MTRRs */
> +	mtrr_bp_init();
> +	mtrr_trim_uncached_memory();
> +
> 	num_physpages = end_pfn;
>
> 	check_efer();
> diff --git a/include/asm-x86_64/mtrr.h b/include/asm-x86_64/mtrr.h
> index b557c48..51ad788 100644
> --- a/include/asm-x86_64/mtrr.h
> +++ b/include/asm-x86_64/mtrr.h
> @@ -78,6 +78,7 @@ extern int mtrr_add_page (unsigned long base, unsigned long size,
> 		     unsigned int type, char increment);
> extern int mtrr_del (int reg, unsigned long base, unsigned long size);
> extern int mtrr_del_page (int reg, unsigned long base, unsigned long size);
> +extern void mtrr_trim_uncached_memory(void);
> #  else
> static __inline__ int mtrr_add (unsigned long base, unsigned long size,
> 				unsigned int type, char increment)
> @@ -99,7 +100,9 @@ static __inline__ int mtrr_del_page (int reg, unsigned long base,
> {
>     return -ENODEV;
> }
> -
> +static __inline__ void mtrr_trim_uncached_memory(void)
> +{
> +}
> #endif /* CONFIG_MTRR */
>
> #ifdef CONFIG_COMPAT
>

^ permalink raw reply	[flat|nested] 118+ messages in thread

* [PATCH] trim memory not covered by WB MTRRs
@ 2007-06-25 21:34 Jesse Barnes
  2007-06-25 21:45 ` Justin Piszcz
                   ` (4 more replies)
  0 siblings, 5 replies; 118+ messages in thread
From: Jesse Barnes @ 2007-06-25 21:34 UTC (permalink / raw)
  To: linux-kernel, akpm, Andi Kleen
  Cc: Justin Piszcz, Eric W. Biederman, Yinghai Lu

On some machines, buggy BIOSes don't properly setup WB MTRRs to
cover all available RAM, meaning the last few megs (or even gigs)
of memory will be marked uncached.  Since Linux tends to allocate
from high memory addresses first, this causes the machine to be
unusably slow as soon as the kernel starts really using memory
(i.e. right around init time).

This patch works around the problem by scanning the MTRRs at
boot and figuring out whether the current end_pfn value (setup
by early e820 code) goes beyond the highest WB MTRR range, and
if so, trimming it to match.  A fairly obnoxious KERN_WARNING
is printed too, letting the user know that not all of their
memory is available due to a likely BIOS bug.

Something similar could be done on i386 if needed, but the boot
ordering would be slightly different, since the MTRR code on i386
depends on the boot_cpu_data structure being setup.

This patch fixes a bug in the last patch that caused the code to
run on non-Intel machines (AMD machines apparently don't need it
and it's untested on other non-Intel machines, so best keep it
off).

akpm -- this one should replace all the mtrr patches currently
in your tree.

Yinghai, maybe you can test this on one of your AMD machines to
make sure I got the CPU code right?

Signed-off-by:  Jesse Barnes <jesse.barnes@intel.com>

Thanks,
Jesse

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 5d0283c..642db9b 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -553,6 +553,12 @@ and is between 256 and 4096 characters. It is defined in the file
 			See drivers/char/README.epca and
 			Documentation/digiepca.txt.
 
+	disable_mtrr_trim [X86-64, Intel only]
+			By default the kernel will trim any uncacheable
+			memory out of your available memory pool based on
+			MTRR settings.  This parameter disables that behavior,
+			possibly causing your machine to run very slowly.
+
 	dmascc=		[HW,AX25,SERIAL] AX.25 Z80SCC driver with DMA
 			support available.
 			Format: <io_dev0>[,<io_dev1>[,..<io_dev32>]]
diff --git a/arch/i386/kernel/cpu/mtrr/generic.c b/arch/i386/kernel/cpu/mtrr/generic.c
index 6d59378..bf70d2d 100644
--- a/arch/i386/kernel/cpu/mtrr/generic.c
+++ b/arch/i386/kernel/cpu/mtrr/generic.c
@@ -13,7 +13,7 @@
 #include "mtrr.h"
 
 struct mtrr_state {
-	struct mtrr_var_range *var_ranges;
+	struct mtrr_var_range var_ranges[MAX_VAR_RANGES];
 	mtrr_type fixed_ranges[NUM_FIXED_RANGES];
 	unsigned char enabled;
 	unsigned char have_fixed;
@@ -84,12 +84,6 @@ void get_mtrr_state(void)
 	struct mtrr_var_range *vrs;
 	unsigned lo, dummy;
 
-	if (!mtrr_state.var_ranges) {
-		mtrr_state.var_ranges = kmalloc(num_var_ranges * sizeof (struct mtrr_var_range), 
-						GFP_KERNEL);
-		if (!mtrr_state.var_ranges)
-			return;
-	} 
 	vrs = mtrr_state.var_ranges;
 
 	rdmsr(MTRRcap_MSR, lo, dummy);
diff --git a/arch/i386/kernel/cpu/mtrr/if.c b/arch/i386/kernel/cpu/mtrr/if.c
index c7d8f17..1b3a09c 100644
--- a/arch/i386/kernel/cpu/mtrr/if.c
+++ b/arch/i386/kernel/cpu/mtrr/if.c
@@ -11,10 +11,6 @@
 #include <asm/mtrr.h>
 #include "mtrr.h"
 
-/* RED-PEN: this is accessed without any locking */
-extern unsigned int *usage_table;
-
-
 #define FILE_FCOUNT(f) (((struct seq_file *)((f)->private_data))->private)
 
 static const char *const mtrr_strings[MTRR_NUM_TYPES] =
@@ -396,7 +392,7 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset)
 	for (i = 0; i < max; i++) {
 		mtrr_if->get(i, &base, &size, &type);
 		if (size == 0)
-			usage_table[i] = 0;
+			mtrr_usage_table[i] = 0;
 		else {
 			if (size < (0x100000 >> PAGE_SHIFT)) {
 				/* less than 1MB */
@@ -410,7 +406,7 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset)
 			len += seq_printf(seq, 
 				   "reg%02i: base=0x%05lx000 (%4luMB), size=%4lu%cB: %s, count=%d\n",
 			     i, base, base >> (20 - PAGE_SHIFT), size, factor,
-			     mtrr_attrib_to_str(type), usage_table[i]);
+			     mtrr_attrib_to_str(type), mtrr_usage_table[i]);
 		}
 	}
 	return 0;
diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c
index 55b0051..0e4be8b 100644
--- a/arch/i386/kernel/cpu/mtrr/main.c
+++ b/arch/i386/kernel/cpu/mtrr/main.c
@@ -38,8 +38,8 @@
 #include <linux/cpu.h>
 #include <linux/mutex.h>
 
+#include <asm/e820.h>
 #include <asm/mtrr.h>
-
 #include <asm/uaccess.h>
 #include <asm/processor.h>
 #include <asm/msr.h>
@@ -47,7 +47,7 @@
 
 u32 num_var_ranges = 0;
 
-unsigned int *usage_table;
+unsigned int mtrr_usage_table[MAX_VAR_RANGES];
 static DEFINE_MUTEX(mtrr_mutex);
 
 u64 size_or_mask, size_and_mask;
@@ -121,13 +121,8 @@ static void __init init_table(void)
 	int i, max;
 
 	max = num_var_ranges;
-	if ((usage_table = kmalloc(max * sizeof *usage_table, GFP_KERNEL))
-	    == NULL) {
-		printk(KERN_ERR "mtrr: could not allocate\n");
-		return;
-	}
 	for (i = 0; i < max; i++)
-		usage_table[i] = 1;
+		mtrr_usage_table[i] = 1;
 }
 
 struct set_mtrr_data {
@@ -381,7 +376,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
 			goto out;
 		}
 		if (increment)
-			++usage_table[i];
+			++mtrr_usage_table[i];
 		error = i;
 		goto out;
 	}
@@ -390,12 +385,13 @@ int mtrr_add_page(unsigned long base, unsigned long size,
 	if (i >= 0) {
 		set_mtrr(i, base, size, type);
 		if (likely(replace < 0))
-			usage_table[i] = 1;
+			mtrr_usage_table[i] = 1;
 		else {
-			usage_table[i] = usage_table[replace] + !!increment;
+			mtrr_usage_table[i] = mtrr_usage_table[replace] +
+				!!increment;
 			if (unlikely(replace != i)) {
 				set_mtrr(replace, 0, 0, 0);
-				usage_table[replace] = 0;
+				mtrr_usage_table[replace] = 0;
 			}
 		}
 	} else
@@ -525,11 +521,11 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
 		printk(KERN_WARNING "mtrr: MTRR %d not used\n", reg);
 		goto out;
 	}
-	if (usage_table[reg] < 1) {
+	if (mtrr_usage_table[reg] < 1) {
 		printk(KERN_WARNING "mtrr: reg: %d has count=0\n", reg);
 		goto out;
 	}
-	if (--usage_table[reg] < 1)
+	if (--mtrr_usage_table[reg] < 1)
 		set_mtrr(reg, 0, 0, 0);
 	error = reg;
  out:
@@ -589,16 +585,11 @@ struct mtrr_value {
 	unsigned long	lsize;
 };
 
-static struct mtrr_value * mtrr_state;
+static struct mtrr_value mtrr_state[MAX_VAR_RANGES];
 
 static int mtrr_save(struct sys_device * sysdev, pm_message_t state)
 {
 	int i;
-	int size = num_var_ranges * sizeof(struct mtrr_value);
-
-	mtrr_state = kzalloc(size,GFP_ATOMIC);
-	if (!mtrr_state)
-		return -ENOMEM;
 
 	for (i = 0; i < num_var_ranges; i++) {
 		mtrr_if->get(i,
@@ -620,7 +611,6 @@ static int mtrr_restore(struct sys_device * sysdev)
 				 mtrr_state[i].lsize,
 				 mtrr_state[i].ltype);
 	}
-	kfree(mtrr_state);
 	return 0;
 }
 
@@ -631,6 +621,59 @@ static struct sysdev_driver mtrr_sysdev_driver = {
 	.resume		= mtrr_restore,
 };
 
+static int disable_mtrr_trim;
+
+static int __init disable_mtrr_trim_setup(char *str)
+{
+	disable_mtrr_trim = 1;
+	return 0;
+}
+early_param("disable_mtrr_trim", disable_mtrr_trim_setup);
+
+#ifdef CONFIG_X86_64
+/**
+ * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs
+ *
+ * Some buggy BIOSes don't setup the MTRRs properly for systems with certain
+ * memory configurations.  This routine checks to make sure the MTRRs having
+ * a write back type cover all of the memory the kernel is intending to use.
+ * If not, it'll trim any memory off the end by adjusting end_pfn, removing
+ * it from the kernel's allocation pools, warning the user with an obnoxious
+ * message.
+ */
+void __init mtrr_trim_uncached_memory(void)
+{
+	unsigned long i, base, size, highest_addr = 0, def, dummy;
+	mtrr_type type;
+
+	/* Make sure we only trim uncachable memory on Intel machines */
+	rdmsr(MTRRdefType_MSR, def, dummy);
+	def &= 0xff;
+	if (!is_cpu(INTEL) || disable_mtrr_trim || def != MTRR_TYPE_UNCACHABLE)
+		return;
+
+	/* Find highest cached pfn */
+	for (i = 0; i < num_var_ranges; i++) {
+		mtrr_if->get(i, &base, &size, &type);
+		if (type != MTRR_TYPE_WRBACK)
+			continue;
+		base <<= PAGE_SHIFT;
+		size <<= PAGE_SHIFT;
+		if (highest_addr < base + size)
+			highest_addr = base + size;
+	}
+
+	if ((highest_addr >> PAGE_SHIFT) != end_pfn) {
+		printk(KERN_WARNING "***************\n");
+		printk(KERN_WARNING "**** WARNING: likely BIOS bug\n");
+		printk(KERN_WARNING "**** MTRRs don't cover all of "
+		       "memory, trimmed %ld pages\n", end_pfn -
+		       (highest_addr >> PAGE_SHIFT));
+		printk(KERN_WARNING "***************\n");
+		end_pfn = highest_addr >> PAGE_SHIFT;
+	}
+}
+#endif
 
 /**
  * mtrr_bp_init - initialize mtrrs on the boot CPU
diff --git a/arch/i386/kernel/cpu/mtrr/mtrr.h b/arch/i386/kernel/cpu/mtrr/mtrr.h
index 289dfe6..14fb88b 100644
--- a/arch/i386/kernel/cpu/mtrr/mtrr.h
+++ b/arch/i386/kernel/cpu/mtrr/mtrr.h
@@ -14,6 +14,7 @@
 #define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
 
 #define NUM_FIXED_RANGES 88
+#define MAX_VAR_RANGES 256
 #define MTRRfix64K_00000_MSR 0x250
 #define MTRRfix16K_80000_MSR 0x258
 #define MTRRfix16K_A0000_MSR 0x259
@@ -34,6 +35,8 @@
    an 8 bit field: */
 typedef u8 mtrr_type;
 
+extern unsigned int mtrr_usage_table[MAX_VAR_RANGES];
+
 struct mtrr_ops {
 	u32	vendor;
 	u32	use_intel_if;
diff --git a/arch/x86_64/kernel/bugs.c b/arch/x86_64/kernel/bugs.c
index c3c6b91..c138eac 100644
--- a/arch/x86_64/kernel/bugs.c
+++ b/arch/x86_64/kernel/bugs.c
@@ -14,7 +14,6 @@
 void __init check_bugs(void)
 {
 	identify_cpu(&boot_cpu_data);
-	mtrr_bp_init();
 #if !defined(CONFIG_SMP)
 	printk("CPU: ");
 	print_cpu_info(&boot_cpu_data);
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index eb6524f..409b63c 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -266,6 +266,10 @@ void __init setup_arch(char **cmdline_p)
 	 * we are rounding upwards:
 	 */
 	end_pfn = e820_end_of_ram();
+	/* Trim memory not covered by WB MTRRs */
+	mtrr_bp_init();
+	mtrr_trim_uncached_memory();
+
 	num_physpages = end_pfn;
 
 	check_efer();
diff --git a/include/asm-x86_64/mtrr.h b/include/asm-x86_64/mtrr.h
index b557c48..51ad788 100644
--- a/include/asm-x86_64/mtrr.h
+++ b/include/asm-x86_64/mtrr.h
@@ -78,6 +78,7 @@ extern int mtrr_add_page (unsigned long base, unsigned long size,
 		     unsigned int type, char increment);
 extern int mtrr_del (int reg, unsigned long base, unsigned long size);
 extern int mtrr_del_page (int reg, unsigned long base, unsigned long size);
+extern void mtrr_trim_uncached_memory(void);
 #  else
 static __inline__ int mtrr_add (unsigned long base, unsigned long size,
 				unsigned int type, char increment)
@@ -99,7 +100,9 @@ static __inline__ int mtrr_del_page (int reg, unsigned long base,
 {
     return -ENODEV;
 }
-
+static __inline__ void mtrr_trim_uncached_memory(void)
+{
+}
 #endif /* CONFIG_MTRR */
 
 #ifdef CONFIG_COMPAT

^ permalink raw reply related	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-25 16:31             ` Pim Zandbergen
@ 2007-06-25 16:34               ` Justin Piszcz
  0 siblings, 0 replies; 118+ messages in thread
From: Justin Piszcz @ 2007-06-25 16:34 UTC (permalink / raw)
  To: Pim Zandbergen; +Cc: Jesse Barnes, linux-kernel

Impressive.

Jesse, can you touch base with Intel's BIOS department?  Also, what are 
the chances of that patch making it into 2.6.22-rc6/7 if it hasn't 
already?

On Mon, 25 Jun 2007, Pim Zandbergen wrote:

> Pim Zandbergen wrote
>>> 
>> I reported this to GigaByte, and lo and behold, they sent me a fixed BIOS 
>> within 48 hours.
>> Kudos to Taipeh!
>> 
>> They sent the BIOS image in a private message, so it might take a while 
>> before it's available
>> on their website.
> It is now, and it is described as "Fix Vista boot lag with 8GB memory issue" 
> ...
>
> Pim
>

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-21 14:24           ` Pim Zandbergen
  2007-06-21 14:28             ` Justin Piszcz
@ 2007-06-25 16:31             ` Pim Zandbergen
  2007-06-25 16:34               ` Justin Piszcz
  1 sibling, 1 reply; 118+ messages in thread
From: Pim Zandbergen @ 2007-06-25 16:31 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: Justin Piszcz, linux-kernel

Pim Zandbergen wrote
>>  
> I reported this to GigaByte, and lo and behold, they sent me a fixed 
> BIOS within 48 hours.
> Kudos to Taipeh!
>
> They sent the BIOS image in a private message, so it might take a 
> while before it's available
> on their website.
It is now, and it is described as "Fix Vista boot lag with 8GB memory 
issue" ...

Pim


^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-21 19:40 ` Yinghai Lu
@ 2007-06-21 19:56   ` Jesse Barnes
  0 siblings, 0 replies; 118+ messages in thread
From: Jesse Barnes @ 2007-06-21 19:56 UTC (permalink / raw)
  To: Yinghai Lu; +Cc: Andi Kleen, linux-kernel, Justin Piszcz, Eric W. Biederman

On Thursday, June 21, 2007 12:40:58 Yinghai Lu wrote:
> On 6/7/07, Jesse Barnes <jesse.barnes@intel.com> wrote:
> > On some machines, buggy BIOSes don't properly setup WB MTRRs to
> > cover all available RAM, meaning the last few megs (or even gigs)
> > of memory will be marked uncached.  Since Linux tends to allocate
> > from high memory addresses first, this causes the machine to be
> > unusably slow as soon as the kernel starts really using memory
> > (i.e. right around init time).
> >
> > This patch works around the problem by scanning the MTRRs at
> > boot and figuring out whether the current end_pfn value (setup
> > by early e820 code) goes beyond the highest WB MTRR range, and
> > if so, trimming it to match.  A fairly obnoxious KERN_WARNING
> > is printed too, letting the user know that not all of their
> > memory is available due to a likely BIOS bug.
> >
> > Something similar could be done on i386 if needed, but the boot
> > ordering would be slightly different, since the MTRR code on i386
> > depends on the boot_cpu_data structure being setup.
> >
> > This patch incorporates the feedback from Eric and Andi:
> >   - use MAX_VAR_RANGES instead of NUM_VAR_RANGES
> >   - move array declaration to header file as an extern
> >   - add command line disable option "disable_mtrr_trim"
> >   - don't run the trim code if the MTRR default type is cacheable
> >   - don't run the trim code on non-Intel machines
> >
> > Justin, feel free to test again if you have time and add your
> > "Tested-by" signoff.
> >
> > Andi, as for large pages, do you think this is ok as is, or should
> > I trim a larger granularity?  If so, what granularity?
> >
> > Signed-off-by:  Jesse Barnes <jesse.barnes@intel.com>
> >
> > Thanks,
> > Jesse
>
> NAK.
>
> for AMD Rev F Opteron later CPU, BIOS will not set WB in MTRR for 4G
> above mem.
>
> This patch will get rid of those RAM.

Yeah, Eric already mentioned that.  I'll rework it to only run on Intel 
CPUs per Eric's last mail.

Thanks,
Jesse

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-07 22:30 Jesse Barnes
                   ` (5 preceding siblings ...)
  2007-06-14 19:38 ` Pim Zandbergen
@ 2007-06-21 19:40 ` Yinghai Lu
  2007-06-21 19:56   ` Jesse Barnes
  6 siblings, 1 reply; 118+ messages in thread
From: Yinghai Lu @ 2007-06-21 19:40 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: Andi Kleen, linux-kernel, Justin Piszcz, Eric W. Biederman

On 6/7/07, Jesse Barnes <jesse.barnes@intel.com> wrote:
> On some machines, buggy BIOSes don't properly setup WB MTRRs to
> cover all available RAM, meaning the last few megs (or even gigs)
> of memory will be marked uncached.  Since Linux tends to allocate
> from high memory addresses first, this causes the machine to be
> unusably slow as soon as the kernel starts really using memory
> (i.e. right around init time).
>
> This patch works around the problem by scanning the MTRRs at
> boot and figuring out whether the current end_pfn value (setup
> by early e820 code) goes beyond the highest WB MTRR range, and
> if so, trimming it to match.  A fairly obnoxious KERN_WARNING
> is printed too, letting the user know that not all of their
> memory is available due to a likely BIOS bug.
>
> Something similar could be done on i386 if needed, but the boot
> ordering would be slightly different, since the MTRR code on i386
> depends on the boot_cpu_data structure being setup.
>
> This patch incorporates the feedback from Eric and Andi:
>   - use MAX_VAR_RANGES instead of NUM_VAR_RANGES
>   - move array declaration to header file as an extern
>   - add command line disable option "disable_mtrr_trim"
>   - don't run the trim code if the MTRR default type is cacheable
>   - don't run the trim code on non-Intel machines
>
> Justin, feel free to test again if you have time and add your
> "Tested-by" signoff.
>
> Andi, as for large pages, do you think this is ok as is, or should
> I trim a larger granularity?  If so, what granularity?
>
> Signed-off-by:  Jesse Barnes <jesse.barnes@intel.com>
>
> Thanks,
> Jesse
>

NAK.

for AMD Rev F Opteron later CPU, BIOS will not set WB in MTRR for 4G above mem.

This patch will get rid of those RAM.

YH

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-21 14:24           ` Pim Zandbergen
@ 2007-06-21 14:28             ` Justin Piszcz
  2007-06-25 16:31             ` Pim Zandbergen
  1 sibling, 0 replies; 118+ messages in thread
From: Justin Piszcz @ 2007-06-21 14:28 UTC (permalink / raw)
  To: Pim Zandbergen; +Cc: Jesse Barnes, linux-kernel



On Thu, 21 Jun 2007, Pim Zandbergen wrote:

> Jesse Barnes wrote:
>>> What, are you going to report this to GigaByte?
>>> 
>> 
>> No, but you should if you haven't already.  I think GigaByte probably gets 
>> its BIOS from another BIOS vendor (maybe Intel), so when that vendor 
>> provides them with an update, they'll probably provide it on their website. 
>> And from what I understand, an Intel BIOS update is in the works to address 
>> this issue for Intel boards, so a GigaByte version may follow shortly.  I 
>> don't have an exact timeframe though...
>>
>> 
> I reported this to GigaByte, and lo and behold, they sent me a fixed BIOS 
> within 48 hours.
> Kudos to Taipeh!
>
> They sent the BIOS image in a private message, so it might take a while 
> before it's available
> on their website.
>
>

Wow, totally the opposite from Intel.

Justin.

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-15 16:20         ` Jesse Barnes
@ 2007-06-21 14:24           ` Pim Zandbergen
  2007-06-21 14:28             ` Justin Piszcz
  2007-06-25 16:31             ` Pim Zandbergen
  0 siblings, 2 replies; 118+ messages in thread
From: Pim Zandbergen @ 2007-06-21 14:24 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: Justin Piszcz, linux-kernel

Jesse Barnes wrote:
>> What, are you going to report this to GigaByte?
>>     
>
> No, but you should if you haven't already.  I think GigaByte probably 
> gets its BIOS from another BIOS vendor (maybe Intel), so when that 
> vendor provides them with an update, they'll probably provide it on 
> their website.  And from what I understand, an Intel BIOS update is in 
> the works to address this issue for Intel boards, so a GigaByte version 
> may follow shortly.  I don't have an exact timeframe though...
>
>   
I reported this to GigaByte, and lo and behold, they sent me a fixed 
BIOS within 48 hours.
Kudos to Taipeh!

They sent the BIOS image in a private message, so it might take a while 
before it's available
on their website.



^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-15 17:28       ` Jesse Barnes
@ 2007-06-20 13:55         ` Pim Zandbergen
  0 siblings, 0 replies; 118+ messages in thread
From: Pim Zandbergen @ 2007-06-20 13:55 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: linux-kernel

Jesse Barnes wrote:
> On Friday, June 15, 2007 3:17:11 Pim Zandbergen wrote:
>   
>> Not that it matters much, as the current i810/intel xorg driver does
>> not yet support the GMA3100, so I'm using the vesa driver.
>>     
>
> I *think* the latest trees support that chip.  If you're feeling brave, 
> checkout the latest version of the xf86-video-intel driver from 
> freedesktop.org and give it a try 
As it happens, the Fedora guys just released a new Intel xorg driver RPM
that supports the G33. It works.
> (to get 3d you'll also need newer DRM and AGP bits).
>   
No 3D indeed. The newer DRM and AGP bits probably go into the kernel.

But hey, it's a server. I've got another machine with the exact same 
hardware
running Windows 2003. No video driver support there at all.

Thanks,
Pim


^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-15 10:17     ` Pim Zandbergen
  2007-06-15 10:34       ` Justin Piszcz
@ 2007-06-15 17:28       ` Jesse Barnes
  2007-06-20 13:55         ` Pim Zandbergen
  1 sibling, 1 reply; 118+ messages in thread
From: Jesse Barnes @ 2007-06-15 17:28 UTC (permalink / raw)
  To: Pim Zandbergen; +Cc: Justin Piszcz, linux-kernel

On Friday, June 15, 2007 3:17:11 Pim Zandbergen wrote:
> Not that it matters much, as the current i810/intel xorg driver does
> not yet support the GMA3100, so I'm using the vesa driver.

I *think* the latest trees support that chip.  If you're feeling brave, 
checkout the latest version of the xf86-video-intel driver from 
freedesktop.org and give it a try (to get 3d you'll also need newer DRM 
and AGP bits).

Jesse

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-15 10:21       ` Pim Zandbergen
@ 2007-06-15 16:20         ` Jesse Barnes
  2007-06-21 14:24           ` Pim Zandbergen
  0 siblings, 1 reply; 118+ messages in thread
From: Jesse Barnes @ 2007-06-15 16:20 UTC (permalink / raw)
  To: Pim Zandbergen; +Cc: Justin Piszcz, linux-kernel

On Friday, June 15, 2007 3:21:17 Pim Zandbergen wrote:
> Jesse Barnes wrote:
> > Thanks for testing, Pim.  Glad it works for you.
>
> The pleasure was all on my side.
>
> > Keep an eye out for BIOS upgrades, the next version might fix it.
>
> What, are you going to report this to GigaByte?

No, but you should if you haven't already.  I think GigaByte probably 
gets its BIOS from another BIOS vendor (maybe Intel), so when that 
vendor provides them with an update, they'll probably provide it on 
their website.  And from what I understand, an Intel BIOS update is in 
the works to address this issue for Intel boards, so a GigaByte version 
may follow shortly.  I don't have an exact timeframe though...

Jesse


^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-15 10:17     ` Pim Zandbergen
@ 2007-06-15 10:34       ` Justin Piszcz
  2007-06-15 17:28       ` Jesse Barnes
  1 sibling, 0 replies; 118+ messages in thread
From: Justin Piszcz @ 2007-06-15 10:34 UTC (permalink / raw)
  To: Pim Zandbergen; +Cc: linux-kernel



On Fri, 15 Jun 2007, Pim Zandbergen wrote:

> Justin Piszcz wrote:
>> That's strange, I guess different chipsets 'chew' up different amounts of 
>> memory OR you have your DVT(?) (video-card memory/aperature) set to 256MB? 
>> I have mine set to 128MB, in top:
>> 
>> Mem:   8039576k total,  6187304k used,  1852272k free,      696k buffers
> Me:
> Mem:   7416672k total,   378988k used,  7037684k free,    13592k buffers
>
>> What type of memory are you using 
> 2x Kingston KVR667D2N5K2/4G
>> and what is your DVT set to?
> GigaByte's BIOS config options of the onboard graphics controller are
> very limited compared to those on your Intel motherboard.
>
> I can only choose the graphics buffer size, between
> "1MB+1~2MB for GTT" or "8MB+1~2MB for GTT".
> I chose the latter. The POST says 9MB are taken for video.
>
> Not that it matters much, as the current i810/intel xorg driver does
> not yet support the GMA3100, so I'm using the vesa driver.
>
> Thanks,
> Pim
>

I use the exact same memory model.  So it must be the difference in BIOS 
MTRR/E820 memory mappings.

Justin.

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-14 21:18     ` Jesse Barnes
  2007-06-14 21:21       ` Justin Piszcz
@ 2007-06-15 10:21       ` Pim Zandbergen
  2007-06-15 16:20         ` Jesse Barnes
  1 sibling, 1 reply; 118+ messages in thread
From: Pim Zandbergen @ 2007-06-15 10:21 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: Justin Piszcz, linux-kernel

Jesse Barnes wrote:

> Thanks for testing, Pim.  Glad it works for you.  
The pleasure was all on my side.

> Keep an eye out for BIOS upgrades, the next version might fix it.
>   

What, are you going to report this to GigaByte?

Thanks,
Pim


^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-14 20:26   ` Justin Piszcz
  2007-06-14 21:18     ` Jesse Barnes
@ 2007-06-15 10:17     ` Pim Zandbergen
  2007-06-15 10:34       ` Justin Piszcz
  2007-06-15 17:28       ` Jesse Barnes
  1 sibling, 2 replies; 118+ messages in thread
From: Pim Zandbergen @ 2007-06-15 10:17 UTC (permalink / raw)
  To: Justin Piszcz; +Cc: linux-kernel

Justin Piszcz wrote:
> That's strange, I guess different chipsets 'chew' up different amounts 
> of memory OR you have your DVT(?) (video-card memory/aperature) set to 
> 256MB? I have mine set to 128MB, in top:
>
> Mem:   8039576k total,  6187304k used,  1852272k free,      696k buffers
Me:
Mem:   7416672k total,   378988k used,  7037684k free,    13592k buffers

> What type of memory are you using 
2x Kingston KVR667D2N5K2/4G
> and what is your DVT set to?
GigaByte's BIOS config options of the onboard graphics controller are
very limited compared to those on your Intel motherboard.

I can only choose the graphics buffer size, between
"1MB+1~2MB for GTT" or "8MB+1~2MB for GTT".
I chose the latter. The POST says 9MB are taken for video.

Not that it matters much, as the current i810/intel xorg driver does
not yet support the GMA3100, so I'm using the vesa driver.

Thanks,
Pim


^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-14 21:21       ` Justin Piszcz
@ 2007-06-14 21:26         ` Jesse Barnes
  0 siblings, 0 replies; 118+ messages in thread
From: Jesse Barnes @ 2007-06-14 21:26 UTC (permalink / raw)
  To: Justin Piszcz
  Cc: Pim Zandbergen, Andi Kleen, linux-kernel, Eric W. Biederman,
	Arjan van de Ven

On Thursday, June 14, 2007 2:21:16 Justin Piszcz wrote:
> To Intel,
>
> When will HECI be supported via the kernel?  When it becomes
> supported, would that alter the MTRR map at all?

I *think* HECI is related to our IT remote management stuff, but I don't 
work on it.  It *may* affect the MTRR mappings, but I think it's just a 
PCI device, so I don't think enabling it will change the MTRR layout.

Arjan, do you know who's doing our AMT stuff?

Thanks,
Jesse

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-14 21:18     ` Jesse Barnes
@ 2007-06-14 21:21       ` Justin Piszcz
  2007-06-14 21:26         ` Jesse Barnes
  2007-06-15 10:21       ` Pim Zandbergen
  1 sibling, 1 reply; 118+ messages in thread
From: Justin Piszcz @ 2007-06-14 21:21 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: Pim Zandbergen, Andi Kleen, linux-kernel, Eric W. Biederman



On Thu, 14 Jun 2007, Jesse Barnes wrote:

> On Thursday, June 14, 2007 1:26:07 Justin Piszcz wrote:
>> On Thu, 14 Jun 2007, Pim Zandbergen wrote:
>>> Thanks for this patch. I was having the exact same symptoms as
>>> Justin Piszcz, on a different, but similar motherboard:
>>>
>>> Motherboard: GigaByte GA-G33-DS3R
>>> BIOS rev: F2
>>> Chipset: Intel G33
>>> Memory: 8GB
>>> Distro: Fedora 7 x86_64
>>> Kernel: kernel-2.6.21-1.3194.fc7
>>>
>>> Building vanilla 2.6.22-rc4 with your patch solved the problem.
>>>
>>> I'm now seeing this in the syslog
>>>
>>> ***************
>>> **** WARNING: likely BIOS bug
>>> **** MTRRs don't cover all of memory, trimmed 196608 pages
>>> ***************
>>>
>>> leaving me 7416672 kB of usable memory.
>>>
>>> If there's any way I can help with more info or testing,
>>> then let me know.
>>>
>>> Thanks,
>>> Pim
>
> Thanks for testing, Pim.  Glad it works for you.  Keep an eye out for
> BIOS upgrades, the next version might fix it.
>
>> That's strange, I guess different chipsets 'chew' up different
>> amounts of memory OR you have your DVT(?) (video-card
>> memory/aperature) set to 256MB? I have mine set to 128MB, in top:
>>
>> Mem:   8039576k total,  6187304k used,  1852272k free,      696k
>> buffers
>>
>> What type of memory are you using and what is your DVT set to?
>
> Different BIOSes will map things differently, so I'd expect differences
> in the "trimmmed xxx pages" message across machines.  But yeah, BIOS
> config options can also affect things, in particular I've heard that
> the fan control options change MTRR setup significantly.
>
> Jesse
>

To Intel,

When will HECI be supported via the kernel?  When it becomes supported, 
would that alter the MTRR map at all?

Justin.

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-14 20:26   ` Justin Piszcz
@ 2007-06-14 21:18     ` Jesse Barnes
  2007-06-14 21:21       ` Justin Piszcz
  2007-06-15 10:21       ` Pim Zandbergen
  2007-06-15 10:17     ` Pim Zandbergen
  1 sibling, 2 replies; 118+ messages in thread
From: Jesse Barnes @ 2007-06-14 21:18 UTC (permalink / raw)
  To: Justin Piszcz; +Cc: Pim Zandbergen, Andi Kleen, linux-kernel, Eric W. Biederman

On Thursday, June 14, 2007 1:26:07 Justin Piszcz wrote:
> On Thu, 14 Jun 2007, Pim Zandbergen wrote:
> > Thanks for this patch. I was having the exact same symptoms as
> > Justin Piszcz, on a different, but similar motherboard:
> >
> > Motherboard: GigaByte GA-G33-DS3R
> > BIOS rev: F2
> > Chipset: Intel G33
> > Memory: 8GB
> > Distro: Fedora 7 x86_64
> > Kernel: kernel-2.6.21-1.3194.fc7
> >
> > Building vanilla 2.6.22-rc4 with your patch solved the problem.
> >
> > I'm now seeing this in the syslog
> >
> > ***************
> > **** WARNING: likely BIOS bug
> > **** MTRRs don't cover all of memory, trimmed 196608 pages
> > ***************
> >
> > leaving me 7416672 kB of usable memory.
> >
> > If there's any way I can help with more info or testing,
> > then let me know.
> >
> > Thanks,
> > Pim

Thanks for testing, Pim.  Glad it works for you.  Keep an eye out for 
BIOS upgrades, the next version might fix it.

> That's strange, I guess different chipsets 'chew' up different
> amounts of memory OR you have your DVT(?) (video-card
> memory/aperature) set to 256MB? I have mine set to 128MB, in top:
>
> Mem:   8039576k total,  6187304k used,  1852272k free,      696k
> buffers
>
> What type of memory are you using and what is your DVT set to?

Different BIOSes will map things differently, so I'd expect differences 
in the "trimmmed xxx pages" message across machines.  But yeah, BIOS 
config options can also affect things, in particular I've heard that 
the fan control options change MTRR setup significantly.

Jesse

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-14 19:38 ` Pim Zandbergen
@ 2007-06-14 20:26   ` Justin Piszcz
  2007-06-14 21:18     ` Jesse Barnes
  2007-06-15 10:17     ` Pim Zandbergen
  0 siblings, 2 replies; 118+ messages in thread
From: Justin Piszcz @ 2007-06-14 20:26 UTC (permalink / raw)
  To: Pim Zandbergen; +Cc: Jesse Barnes, Andi Kleen, linux-kernel, Eric W. Biederman



On Thu, 14 Jun 2007, Pim Zandbergen wrote:

> Thanks for this patch. I was having the exact same symptoms as Justin Piszcz, 
> on a different, but similar motherboard:
>
> Motherboard: GigaByte GA-G33-DS3R
> BIOS rev: F2
> Chipset: Intel G33
> Memory: 8GB
> Distro: Fedora 7 x86_64
> Kernel: kernel-2.6.21-1.3194.fc7
>
> Building vanilla 2.6.22-rc4 with your patch solved the problem.
>
> I'm now seeing this in the syslog
>
> ***************
> **** WARNING: likely BIOS bug
> **** MTRRs don't cover all of memory, trimmed 196608 pages
> ***************
>
> leaving me 7416672 kB of usable memory.
>
> If there's any way I can help with more info or testing,
> then let me know.
>
> Thanks,
> Pim
>

That's strange, I guess different chipsets 'chew' up different amounts of 
memory OR you have your DVT(?) (video-card memory/aperature) set to 256MB? 
I have mine set to 128MB, in top:

Mem:   8039576k total,  6187304k used,  1852272k free,      696k buffers

What type of memory are you using and what is your DVT set to?

Justin.

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-07 22:30 Jesse Barnes
                   ` (4 preceding siblings ...)
  2007-06-12 14:50 ` Pavel Machek
@ 2007-06-14 19:38 ` Pim Zandbergen
  2007-06-14 20:26   ` Justin Piszcz
  2007-06-21 19:40 ` Yinghai Lu
  6 siblings, 1 reply; 118+ messages in thread
From: Pim Zandbergen @ 2007-06-14 19:38 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: Andi Kleen, linux-kernel, Justin Piszcz, Eric W. Biederman

Thanks for this patch. I was having the exact same symptoms as Justin 
Piszcz, on a different, but similar motherboard:

Motherboard: GigaByte GA-G33-DS3R
BIOS rev: F2
Chipset: Intel G33
Memory: 8GB
Distro: Fedora 7 x86_64
Kernel: kernel-2.6.21-1.3194.fc7

Building vanilla 2.6.22-rc4 with your patch solved the problem.

I'm now seeing this in the syslog

  ***************
  **** WARNING: likely BIOS bug
  **** MTRRs don't cover all of memory, trimmed 196608 pages
  ***************

leaving me 7416672 kB of usable memory.

If there's any way I can help with more info or testing,
then let me know.

Thanks,
Pim


^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-13  6:52 ` Bodo Eggert
@ 2007-06-13 16:19   ` Dave Jones
  0 siblings, 0 replies; 118+ messages in thread
From: Dave Jones @ 2007-06-13 16:19 UTC (permalink / raw)
  To: Bodo Eggert; +Cc: Jesse Barnes, linux-kernel, stin Piszcz, Eric W. Biederman

On Wed, Jun 13, 2007 at 08:52:23AM +0200, Bodo Eggert wrote:
 > Jesse Barnes <jesse.barnes@intel.com> wrote:
 > 
 > > On some machines, buggy BIOSes don't properly setup WB MTRRs to
 > > cover all available RAM, meaning the last few megs (or even gigs)
 > > of memory will be marked uncached.  Since Linux tends to allocate
 > > from high memory addresses first, this causes the machine to be
 > > unusably slow as soon as the kernel starts really using memory
 > > (i.e. right around init time).
 > > 
 > > This patch works around the problem by scanning the MTRRs at
 > > boot and figuring out whether the current end_pfn value (setup
 > > by early e820 code) goes beyond the highest WB MTRR range, and
 > > if so, trimming it to match.  A fairly obnoxious KERN_WARNING
 > > is printed too, letting the user know that not all of their
 > > memory is available due to a likely BIOS bug.
 > 
 > Wouldn't it be better to correct the MTRR, if possible? As far as I read
 > here (LKML), the BIOS did not merge the entries

The size/alignment constraints of MTRRs (must be a power of 2)
means that the best-fit method of covering non power of 2 memory sizes
is the, well.. best fit.  There's nothing that can be merged.

	Dave

-- 
http://www.codemonkey.org.uk

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-13  0:25           ` Ray Lee
@ 2007-06-13  8:22             ` Pavel Machek
  0 siblings, 0 replies; 118+ messages in thread
From: Pavel Machek @ 2007-06-13  8:22 UTC (permalink / raw)
  To: Ray Lee
  Cc: Jesse Barnes, Andi Kleen, linux-kernel, Justin Piszcz, Eric W. Biederman

Hi!

> >> Panicking when it's not necessary is anti-social. If the kernel can
> >> continue, then it should, unless it's a correctness issue that may
> >> cause data corruption. Given that the kernel can even work around the
> >> problem now, throwing a panic is even less warranted.
> >
> >Printk("*********************** WARNING")
> >
> >is anti-social, too.
> 
> Pavel, this warning isn't even going to print on any of your systems.
> So it's completely different than the straw-man you're proposing (that
> I snipped).
> 
> Look, if you want to argue that the stars should go away, then sure,
> I'm not going to stop you. But panicking over a BIOS misconfiguration
> issue? One that can be corrected by the kernel? That's just plain
> stupid.

Well, either the warning is _really_ important. Then it is not really
warning, but a fatal problem, and we should panic for it (so that user
sees the message) and ask for a command line option (so we really
really know user wants to ignore that warning).

Or it is important but not _that_ important. We have
printk(KERN_EMERG) for that.

Or maybe it is not so important. We have printk(KERN_WARNING) for
that.

Pick one, but doing "KERN_WARNING" level with message
"************************* I'm extremely important warning,
************************** uhuh maybe there is something bad in your
bios but I'm not really sure" is just wrong.

									Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
       [not found] <8tyOc-8f0-17@gated-at.bofh.it>
@ 2007-06-13  6:52 ` Bodo Eggert
  2007-06-13 16:19   ` Dave Jones
  0 siblings, 1 reply; 118+ messages in thread
From: Bodo Eggert @ 2007-06-13  6:52 UTC (permalink / raw)
  To: Jesse Barnes, linux-kernel, stin Piszcz, Eric W. Biederman

Jesse Barnes <jesse.barnes@intel.com> wrote:

> On some machines, buggy BIOSes don't properly setup WB MTRRs to
> cover all available RAM, meaning the last few megs (or even gigs)
> of memory will be marked uncached.  Since Linux tends to allocate
> from high memory addresses first, this causes the machine to be
> unusably slow as soon as the kernel starts really using memory
> (i.e. right around init time).
> 
> This patch works around the problem by scanning the MTRRs at
> boot and figuring out whether the current end_pfn value (setup
> by early e820 code) goes beyond the highest WB MTRR range, and
> if so, trimming it to match.  A fairly obnoxious KERN_WARNING
> is printed too, letting the user know that not all of their
> memory is available due to a likely BIOS bug.

Wouldn't it be better to correct the MTRR, if possible? As far as I read
here (LKML), the BIOS did not merge the entries, and this waste caused the
last part of the memory not to be covered. Off cause you can't DTRT for all
buggy MTRR setups, but if you're lucky, optionally merging the MTRR and
adding the rest of the memory may sometimes do the trick ...
-- 
Funny quotes:
10. Nothing is fool proof to a talented fool.

Friß, Spammer: v9Ttukbw@NO.7eggert.dyndns.org

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-12 21:55         ` Pavel Machek
@ 2007-06-13  0:25           ` Ray Lee
  2007-06-13  8:22             ` Pavel Machek
  0 siblings, 1 reply; 118+ messages in thread
From: Ray Lee @ 2007-06-13  0:25 UTC (permalink / raw)
  To: Pavel Machek
  Cc: Jesse Barnes, Andi Kleen, linux-kernel, Justin Piszcz, Eric W. Biederman

On 6/12/07, Pavel Machek <pavel@ucw.cz> wrote:
> On Tue 2007-06-12 14:38:28, Ray Lee wrote:
> > Panicking when it's not necessary is anti-social. If the kernel can
> > continue, then it should, unless it's a correctness issue that may
> > cause data corruption. Given that the kernel can even work around the
> > problem now, throwing a panic is even less warranted.
>
> Printk("*********************** WARNING")
>
> is anti-social, too.

Pavel, this warning isn't even going to print on any of your systems.
So it's completely different than the straw-man you're proposing (that
I snipped).

Look, if you want to argue that the stars should go away, then sure,
I'm not going to stop you. But panicking over a BIOS misconfiguration
issue? One that can be corrected by the kernel? That's just plain
stupid.

Ray

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-12 21:38       ` Ray Lee
@ 2007-06-12 21:55         ` Pavel Machek
  2007-06-13  0:25           ` Ray Lee
  0 siblings, 1 reply; 118+ messages in thread
From: Pavel Machek @ 2007-06-12 21:55 UTC (permalink / raw)
  To: Ray Lee
  Cc: Jesse Barnes, Andi Kleen, linux-kernel, Justin Piszcz, Eric W. Biederman

On Tue 2007-06-12 14:38:28, Ray Lee wrote:
> On 6/12/07, Pavel Machek <pavel@ucw.cz> wrote:
> >> > > On some machines, buggy BIOSes don't properly setup WB MTRRs to
> >> > > cover all available RAM, meaning the last few megs (or even gigs)
> >> > > of memory will be marked uncached.  Since Linux tends to allocate
> >> > > from high memory addresses first, this causes the machine to be
> >> > > unusably slow as soon as the kernel starts really using memory
> >> > > (i.e. right around init time).
> >> > >
> >> > > + if ((highest_addr >> PAGE_SHIFT) != end_pfn) {
> >> > > +         printk(KERN_WARNING "***************\n");
> >> > > +         printk(KERN_WARNING "**** WARNING: likely BIOS bug\n");
> >> > > +         printk(KERN_WARNING "**** MTRRs don't cover all of "
> >> > > +                "memory, trimmed %ld pages\n", end_pfn -
> >> > > +                (highest_addr >> PAGE_SHIFT));
> >> > > +         printk(KERN_WARNING "***************\n");
> >> > > +         end_pfn = highest_addr >> PAGE_SHIFT;
> >> >
> >> > Missing 4K of memory is not worth 4K of junk in syslog per boot. Can
> >> > you drop the stars and stop shouting?
> >>
> >> How missing about 1G of memory?  We already discussed this, and Andi and
> >> Venki felt that either a panic or a really obnoxious message was the
> >> way to go...
> >
> >Just use panic, then.
> >                                                                        Pavel,
> >        who still thinks anyone missing 1GB of ram will not miss
> >        friendly notice in dmesg, even if it goes without 20 stars.
> 
> Panicking when it's not necessary is anti-social. If the kernel can
> continue, then it should, unless it's a correctness issue that may
> cause data corruption. Given that the kernel can even work around the
> problem now, throwing a panic is even less warranted.

Printk("*********************** WARNING")

is anti-social, too.

Here's what my dmesg looks. Lots of uninteresting, unneccessary crap.

Instead of removing some of that crap, this patch starts "I'm more
important than you" wars, trying to get attetion with stars. How do
you like dump below?

Come on, we have printk loglevels. They are meant for getting
attetion. Shouting printk with ton of stars is not.

								Pavel

Linux version 2.6.22-rc4 (pavel@amd) (gcc version 4.1.2 20061115
(prerelease) (Debian 4.1.1-21)) #440 SMP Sat Jun 9 15:25:43 CEST 2007
...
DMI present.
ACPI: RSDP 000F6880, 0024 (r2 LENOVO)
ACPI: XSDT 7F6E6621, 0074 (r1 LENOVO TP-7B        1060  LTP        0)
ACPI: FACP 7F6E6700, 00F4 (r3 LENOVO TP-7B        1060 LNVO        1)
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!ACPI WARNING (TBFADT-0434): OPTIONAL FIELD
!!!!!!!!!!!!!!!!!!"GPE1BLOCK" HAS ZERO ADDRESS OR LENGTH: 000000000000102C/0 [20070126]
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
ACPI: DSDT 7F6E68E7, C463 (r1 LENOVO TP-7B        1060 MSFT  100000E)
ACPI: FACS 7F6F4000, 0040
ACPI: SSDT 7F6E68B4, 0033 (r1 LENOVO TP-7B        1060 MSFT  100000E)
ACPI: ECDT 7F6F2D4A, 0052 (r1 LENOVO TP-7B        1060 LNVO        1)
ACPI: TCPA 7F6F2D9C, 0032 (r2 LENOVO TP-7B        1060 LNVO        1)
ACPI: APIC 7F6F2DCE, 0068 (r1 LENOVO TP-7B        1060 LNVO        1)
ACPI: MCFG 7F6F2E36, 003E (r1 LENOVO TP-7B        1060 LNVO        1)
ACPI: HPET 7F6F2E74, 0038 (r1 LENOVO TP-7B        1060 LNVO        1)
ACPI: BOOT 7F6F2FD8, 0028 (r1 LENOVO TP-7B        1060  LTP        1)
ACPI: SSDT 7F6E5BDC, 0507 (r1 LENOVO TP-7B        1060 INTL 20050513)
ACPI: SSDT 7F6E5A04, 01D8 (r1 LENOVO TP-7B        1060 INTL 20050513)
ACPI: PM-Timer IO Port: 0x1008
ACPI: Local APIC address 0xfee00000
ACPI: LAPIC (acpi_id[0x00] lapic_id[0x00] enabled)
Processor #0 6:14 APIC version 20
ACPI: LAPIC (acpi_id[0x01] lapic_id[0x01] enabled)
Processor #1 6:14 APIC version 20
ACPI: LAPIC_NMI (acpi_id[0x00] high edge lint[0x1])
ACPI: LAPIC_NMI (acpi_id[0x01] high edge lint[0x1])
ACPI: IOAPIC (id[0x01] address[0xfec00000] gsi_base[0])
IOAPIC[0]: apic_id 1, version 32, address 0xfec00000, GSI 0-23
ACPI: INT_SRC_OVR (bus 0 bus_irq 0 global_irq 2 dfl dfl)
ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 high level)
////////////////////////////////////////////////////////////////////
/////////////////////////////ACPI: IRQ0 used by override.
///////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////
/////////////////////////////ACPI: IRQ2 used by override.
////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////
/////////////////////////////ACPI: IRQ9 used by override.
////////////////////////////////////////////////////////////////////
Enabling APIC mode:  Flat.  Using 1 I/O APICs
Using ACPI (MADT) for SMP configuration information
Allocating PCI resources starting at 88000000 (gap: 80000000:70000000)
Built 1 zonelists.  Total pages: 517875
Kernel command line: root=/dev/sda4 resume=/dev/sda1
psmouse.psmouse_proto=imps psmouse_proto=imps psmouse.proto=imps
vga=791 init=/tmp/swsusp-init
************ Unknown boot option `psmouse.psmouse_proto=imps': ignoring
mapped APIC to ffffd000 (fee00000)
mapped IOAPIC to ffffc000 (fec00000)
Enabling fast FPU save and restore... done.
....
Total of 2 processors activated (7318.93 BogoMIPS).
ENABLING IO-APIC IRQs
..TIMER: vector=0x31 apic1=0 pin1=2 apic2=-1 pin2=-1
checking TSC synchronization [CPU#0 -> CPU#1]:
************
*
*
*
*           Measured 627605 cycles TSC warp between CPUs, turning off TSC clock.
*
*
*
*
*
**********************
##########################################################################
##### Marking TSC unstable due to: check_tsc_sync_source failed. ########
########################################################################
Brought up 2 CPUs
migration_cost=10000

									Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-12 21:30     ` Pavel Machek
  2007-06-12 21:31       ` Justin Piszcz
@ 2007-06-12 21:38       ` Ray Lee
  2007-06-12 21:55         ` Pavel Machek
  1 sibling, 1 reply; 118+ messages in thread
From: Ray Lee @ 2007-06-12 21:38 UTC (permalink / raw)
  To: Pavel Machek
  Cc: Jesse Barnes, Andi Kleen, linux-kernel, Justin Piszcz, Eric W. Biederman

On 6/12/07, Pavel Machek <pavel@ucw.cz> wrote:
> > > > On some machines, buggy BIOSes don't properly setup WB MTRRs to
> > > > cover all available RAM, meaning the last few megs (or even gigs)
> > > > of memory will be marked uncached.  Since Linux tends to allocate
> > > > from high memory addresses first, this causes the machine to be
> > > > unusably slow as soon as the kernel starts really using memory
> > > > (i.e. right around init time).
> > > >
> > > > + if ((highest_addr >> PAGE_SHIFT) != end_pfn) {
> > > > +         printk(KERN_WARNING "***************\n");
> > > > +         printk(KERN_WARNING "**** WARNING: likely BIOS bug\n");
> > > > +         printk(KERN_WARNING "**** MTRRs don't cover all of "
> > > > +                "memory, trimmed %ld pages\n", end_pfn -
> > > > +                (highest_addr >> PAGE_SHIFT));
> > > > +         printk(KERN_WARNING "***************\n");
> > > > +         end_pfn = highest_addr >> PAGE_SHIFT;
> > >
> > > Missing 4K of memory is not worth 4K of junk in syslog per boot. Can
> > > you drop the stars and stop shouting?
> >
> > How missing about 1G of memory?  We already discussed this, and Andi and
> > Venki felt that either a panic or a really obnoxious message was the
> > way to go...
>
> Just use panic, then.
>                                                                         Pavel,
>         who still thinks anyone missing 1GB of ram will not miss
>         friendly notice in dmesg, even if it goes without 20 stars.

Panicking when it's not necessary is anti-social. If the kernel can
continue, then it should, unless it's a correctness issue that may
cause data corruption. Given that the kernel can even work around the
problem now, throwing a panic is even less warranted.

Ray

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-12 21:30     ` Pavel Machek
@ 2007-06-12 21:31       ` Justin Piszcz
  2007-06-12 21:38       ` Ray Lee
  1 sibling, 0 replies; 118+ messages in thread
From: Justin Piszcz @ 2007-06-12 21:31 UTC (permalink / raw)
  To: Pavel Machek; +Cc: Jesse Barnes, Andi Kleen, linux-kernel, Eric W. Biederman



On Tue, 12 Jun 2007, Pavel Machek wrote:

> Hi!
>
>>>> On some machines, buggy BIOSes don't properly setup WB MTRRs to
>>>> cover all available RAM, meaning the last few megs (or even gigs)
>>>> of memory will be marked uncached.  Since Linux tends to allocate
>>>> from high memory addresses first, this causes the machine to be
>>>> unusably slow as soon as the kernel starts really using memory
>>>> (i.e. right around init time).
>>>>
>>>> +	if ((highest_addr >> PAGE_SHIFT) != end_pfn) {
>>>> +		printk(KERN_WARNING "***************\n");
>>>> +		printk(KERN_WARNING "**** WARNING: likely BIOS bug\n");
>>>> +		printk(KERN_WARNING "**** MTRRs don't cover all of "
>>>> +		       "memory, trimmed %ld pages\n", end_pfn -
>>>> +		       (highest_addr >> PAGE_SHIFT));
>>>> +		printk(KERN_WARNING "***************\n");
>>>> +		end_pfn = highest_addr >> PAGE_SHIFT;
>>>
>>> Missing 4K of memory is not worth 4K of junk in syslog per boot. Can
>>> you drop the stars and stop shouting?
>>
>> How missing about 1G of memory?  We already discussed this, and Andi and
>> Venki felt that either a panic or a really obnoxious message was the
>> way to go...
>
> Just use panic, then.
> 									Pavel,
> 	who still thinks anyone missing 1GB of ram will not miss
> 	friendly notice in dmesg, even if it goes without 20 stars.
>
> -- 
> (english) http://www.livejournal.com/~pavelmachek
> (cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
>

What is wrong with Jesse's patch?  I've been using it for quite a few days 
now, no issues.

Justin.

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-12 15:29   ` Jesse Barnes
  2007-06-12 15:48     ` Andi Kleen
@ 2007-06-12 21:30     ` Pavel Machek
  2007-06-12 21:31       ` Justin Piszcz
  2007-06-12 21:38       ` Ray Lee
  1 sibling, 2 replies; 118+ messages in thread
From: Pavel Machek @ 2007-06-12 21:30 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: Andi Kleen, linux-kernel, Justin Piszcz, Eric W. Biederman

Hi!

> > > On some machines, buggy BIOSes don't properly setup WB MTRRs to
> > > cover all available RAM, meaning the last few megs (or even gigs)
> > > of memory will be marked uncached.  Since Linux tends to allocate
> > > from high memory addresses first, this causes the machine to be
> > > unusably slow as soon as the kernel starts really using memory
> > > (i.e. right around init time).
> > >
> > > +	if ((highest_addr >> PAGE_SHIFT) != end_pfn) {
> > > +		printk(KERN_WARNING "***************\n");
> > > +		printk(KERN_WARNING "**** WARNING: likely BIOS bug\n");
> > > +		printk(KERN_WARNING "**** MTRRs don't cover all of "
> > > +		       "memory, trimmed %ld pages\n", end_pfn -
> > > +		       (highest_addr >> PAGE_SHIFT));
> > > +		printk(KERN_WARNING "***************\n");
> > > +		end_pfn = highest_addr >> PAGE_SHIFT;
> >
> > Missing 4K of memory is not worth 4K of junk in syslog per boot. Can
> > you drop the stars and stop shouting?
> 
> How missing about 1G of memory?  We already discussed this, and Andi and 
> Venki felt that either a panic or a really obnoxious message was the 
> way to go...

Just use panic, then.
									Pavel,
	who still thinks anyone missing 1GB of ram will not miss
	friendly notice in dmesg, even if it goes without 20 stars.

-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-12 15:29   ` Jesse Barnes
@ 2007-06-12 15:48     ` Andi Kleen
  2007-06-12 21:30     ` Pavel Machek
  1 sibling, 0 replies; 118+ messages in thread
From: Andi Kleen @ 2007-06-12 15:48 UTC (permalink / raw)
  To: Jesse Barnes
  Cc: Pavel Machek, Andi Kleen, linux-kernel, Justin Piszcz, Eric W. Biederman

> > Missing 4K of memory is not worth 4K of junk in syslog per boot. Can
> > you drop the stars and stop shouting?
> 
> How missing about 1G of memory?  We already discussed this, and Andi and 
> Venki felt that either a panic or a really obnoxious message was the 
> way to go...

Perhaps you could vary the number of stars based on the missing amount 
to satisfy Pavel @)

-Andi

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-12 14:50 ` Pavel Machek
@ 2007-06-12 15:29   ` Jesse Barnes
  2007-06-12 15:48     ` Andi Kleen
  2007-06-12 21:30     ` Pavel Machek
  0 siblings, 2 replies; 118+ messages in thread
From: Jesse Barnes @ 2007-06-12 15:29 UTC (permalink / raw)
  To: Pavel Machek; +Cc: Andi Kleen, linux-kernel, Justin Piszcz, Eric W. Biederman

On Tuesday, June 12, 2007 7:50:08 Pavel Machek wrote:
> Hi!
>
> > On some machines, buggy BIOSes don't properly setup WB MTRRs to
> > cover all available RAM, meaning the last few megs (or even gigs)
> > of memory will be marked uncached.  Since Linux tends to allocate
> > from high memory addresses first, this causes the machine to be
> > unusably slow as soon as the kernel starts really using memory
> > (i.e. right around init time).
> >
> > +	if ((highest_addr >> PAGE_SHIFT) != end_pfn) {
> > +		printk(KERN_WARNING "***************\n");
> > +		printk(KERN_WARNING "**** WARNING: likely BIOS bug\n");
> > +		printk(KERN_WARNING "**** MTRRs don't cover all of "
> > +		       "memory, trimmed %ld pages\n", end_pfn -
> > +		       (highest_addr >> PAGE_SHIFT));
> > +		printk(KERN_WARNING "***************\n");
> > +		end_pfn = highest_addr >> PAGE_SHIFT;
>
> Missing 4K of memory is not worth 4K of junk in syslog per boot. Can
> you drop the stars and stop shouting?

How missing about 1G of memory?  We already discussed this, and Andi and 
Venki felt that either a panic or a really obnoxious message was the 
way to go...

Jesse

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-07 22:30 Jesse Barnes
                   ` (3 preceding siblings ...)
  2007-06-08  8:20 ` Justin Piszcz
@ 2007-06-12 14:50 ` Pavel Machek
  2007-06-12 15:29   ` Jesse Barnes
  2007-06-14 19:38 ` Pim Zandbergen
  2007-06-21 19:40 ` Yinghai Lu
  6 siblings, 1 reply; 118+ messages in thread
From: Pavel Machek @ 2007-06-12 14:50 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: Andi Kleen, linux-kernel, Justin Piszcz, Eric W. Biederman

Hi!

> On some machines, buggy BIOSes don't properly setup WB MTRRs to
> cover all available RAM, meaning the last few megs (or even gigs)
> of memory will be marked uncached.  Since Linux tends to allocate
> from high memory addresses first, this causes the machine to be
> unusably slow as soon as the kernel starts really using memory
> (i.e. right around init time).

> +	if ((highest_addr >> PAGE_SHIFT) != end_pfn) {
> +		printk(KERN_WARNING "***************\n");
> +		printk(KERN_WARNING "**** WARNING: likely BIOS bug\n");
> +		printk(KERN_WARNING "**** MTRRs don't cover all of "
> +		       "memory, trimmed %ld pages\n", end_pfn -
> +		       (highest_addr >> PAGE_SHIFT));
> +		printk(KERN_WARNING "***************\n");
> +		end_pfn = highest_addr >> PAGE_SHIFT;

Missing 4K of memory is not worth 4K of junk in syslog per boot. Can
you drop the stars and stop shouting?

							Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-07 22:30 Jesse Barnes
                   ` (2 preceding siblings ...)
  2007-06-07 23:00 ` Justin Piszcz
@ 2007-06-08  8:20 ` Justin Piszcz
  2007-06-12 14:50 ` Pavel Machek
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 118+ messages in thread
From: Justin Piszcz @ 2007-06-08  8:20 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: Andi Kleen, linux-kernel, Eric W. Biederman



On Thu, 7 Jun 2007, Jesse Barnes wrote:

> On some machines, buggy BIOSes don't properly setup WB MTRRs to
> cover all available RAM, meaning the last few megs (or even gigs)
> of memory will be marked uncached.  Since Linux tends to allocate
> from high memory addresses first, this causes the machine to be
> unusably slow as soon as the kernel starts really using memory
> (i.e. right around init time).
>
> This patch works around the problem by scanning the MTRRs at
> boot and figuring out whether the current end_pfn value (setup
> by early e820 code) goes beyond the highest WB MTRR range, and
> if so, trimming it to match.  A fairly obnoxious KERN_WARNING
> is printed too, letting the user know that not all of their
> memory is available due to a likely BIOS bug.
>
> Something similar could be done on i386 if needed, but the boot
> ordering would be slightly different, since the MTRR code on i386
> depends on the boot_cpu_data structure being setup.
>
> This patch incorporates the feedback from Eric and Andi:
>  - use MAX_VAR_RANGES instead of NUM_VAR_RANGES
>  - move array declaration to header file as an extern
>  - add command line disable option "disable_mtrr_trim"
>  - don't run the trim code if the MTRR default type is cacheable
>  - don't run the trim code on non-Intel machines
>
> Justin, feel free to test again if you have time and add your
> "Tested-by" signoff.
>
> Andi, as for large pages, do you think this is ok as is, or should
> I trim a larger granularity?  If so, what granularity?
>
> Signed-off-by:  Jesse Barnes <jesse.barnes@intel.com>
>
> Thanks,
> Jesse
>

Looks good, it sustained many backups, bzip2, and even some encoding
processes, no issues.  Let me know if you need me to test any future
iterations of the patch, so far each has been fine, no problems to
report using the 965WH motherboard with 8GB of memory.

Signed-off-by:  Justin Piszcz <jpiszcz@lucidpixels.com>


^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
       [not found]       ` <fa.x8ZCt4n0yXI1llhRq4wfjNfqK4w@ifi.uio.no>
@ 2007-06-08  1:57         ` Robert Hancock
  0 siblings, 0 replies; 118+ messages in thread
From: Robert Hancock @ 2007-06-08  1:57 UTC (permalink / raw)
  To: Justin Piszcz
  Cc: Jesse Barnes, Randy Dunlap, Andi Kleen, linux-kernel, Eric W. Biederman

Justin Piszcz wrote:
>> Note that your boot also mentions this:
>>
>> [  106.449661] mtrr: no more MTRRs available
>>
>> which indicates that things like X may not be able to map the
>> framebuffer with the 'write-combine' attribute, which will hurt
>> performance.  I've heard reports that turning of 'Intel QST fan
>> control' in your BIOS settings will prevent all your MTRRs from being
>> used (improperly, probably another BIOS bug) so that X will perform
>> well.  But if you don't use X on this machine, you don't have to worry
>> about it.  The other option would be to remap your MTRRs by hand to
>> free one up for X, you can do that by combining the last one or two
>> entries into a single MTRR using the API described in
>> Documentation/mtrr.txt before you start X.
>>
>> Jesse
>>
> 
> FYI--
> 
> [  106.449661] mtrr: no more MTRRs available
> 
> This has always occurred, even with mem=8832M setting.
> 
> Justin.

Yes, it's another consequence of the way your BIOS configured the MTRRs 
(wastefully, using more of the precious register entries than it needed 
to, in addition to not covering all of the RAM).

-- 
Robert Hancock      Saskatoon, SK, Canada
To email, remove "nospam" from hancockr@nospamshaw.ca
Home Page: http://www.roberthancock.com/


^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-07 22:30 Jesse Barnes
  2007-06-07 22:50 ` Justin Piszcz
  2007-06-07 22:53 ` Justin Piszcz
@ 2007-06-07 23:00 ` Justin Piszcz
  2007-06-08  8:20 ` Justin Piszcz
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 118+ messages in thread
From: Justin Piszcz @ 2007-06-07 23:00 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: Andi Kleen, linux-kernel, Eric W. Biederman

[-- Attachment #1: Type: TEXT/PLAIN, Size: 2621 bytes --]



On Thu, 7 Jun 2007, Jesse Barnes wrote:

> On some machines, buggy BIOSes don't properly setup WB MTRRs to
> cover all available RAM, meaning the last few megs (or even gigs)
> of memory will be marked uncached.  Since Linux tends to allocate
> from high memory addresses first, this causes the machine to be
> unusably slow as soon as the kernel starts really using memory
> (i.e. right around init time).
>
> This patch works around the problem by scanning the MTRRs at
> boot and figuring out whether the current end_pfn value (setup
> by early e820 code) goes beyond the highest WB MTRR range, and
> if so, trimming it to match.  A fairly obnoxious KERN_WARNING
> is printed too, letting the user know that not all of their
> memory is available due to a likely BIOS bug.
>
> Something similar could be done on i386 if needed, but the boot
> ordering would be slightly different, since the MTRR code on i386
> depends on the boot_cpu_data structure being setup.
>
> This patch incorporates the feedback from Eric and Andi:
>  - use MAX_VAR_RANGES instead of NUM_VAR_RANGES
>  - move array declaration to header file as an extern
>  - add command line disable option "disable_mtrr_trim"
>  - don't run the trim code if the MTRR default type is cacheable
>  - don't run the trim code on non-Intel machines
>
> Justin, feel free to test again if you have time and add your
> "Tested-by" signoff.
>
> Andi, as for large pages, do you think this is ok as is, or should
> I trim a larger granularity?  If so, what granularity?
>
> Signed-off-by:  Jesse Barnes <jesse.barnes@intel.com>
>
> Thanks,
> Jesse
>

v1 of your patch:

top - 18:53:46 up 1 day, 1 min, 27 users,  load average: 2.82, 1.11,
0.90
Tasks: 356 total,   7 running, 348 sleeping,   1 stopped,   0 zombie
Cpu(s):  2.2%us,  0.4%sy,  0.0%ni, 97.0%id,  0.1%wa,  0.0%hi,  0.2%si,
0.0%st
Mem:   8039576k total,  7962376k used,    77200k free,      716k buffers
Swap: 16787768k total,      128k used, 16787640k free,  6713332k cached

v2 of your patch: (dmesg also attached)

top - 18:58:59 up 2 min,  4 users,  load average: 0.12, 0.13, 0.05
Tasks: 155 total,   1 running, 154 sleeping,   0 stopped,   0 zombie
Cpu(s):  2.0%us,  1.1%sy,  0.5%ni, 94.8%id,  1.5%wa,  0.0%hi,  0.0%si,
0.0%st
Mem:   8039576k total,   982192k used,  7057384k free,     1876k buffers
Swap: 16787768k total,        0k used, 16787768k free,   114492k cached

If the box has no issues over the next 8 hours with me pounding it with 
backups, bzip2s etc I'll consider it good, so far it boots fine etc, no 
issues, but I'll let it cook for a bit.  Will update tomorrow.

Thanks,

Justin.

[-- Attachment #2: Type: TEXT/plain, Size: 49579 bytes --]

[    0.000000] Linux version 2.6.22-rc4 (root@p34.internal.lan) (gcc version 4.1.2 20061115 (prerelease) (Debian 4.1.1-21)) #4 SMP Thu Jun 7 18:54:18 EDT 2007
[    0.000000] Command line: auto BOOT_IMAGE=2.6.22-rc4-4 ro root=902 netconsole=4444@192.168.168.253/eth0,514@192.168.168.254/00:50:8D:ED:3C:E7
[    0.000000] BIOS-provided physical RAM map:
[    0.000000]  BIOS-e820: 0000000000000000 - 000000000008f000 (usable)
[    0.000000]  BIOS-e820: 000000000008f000 - 00000000000a0000 (reserved)
[    0.000000]  BIOS-e820: 00000000000e0000 - 0000000000100000 (reserved)
[    0.000000]  BIOS-e820: 0000000000100000 - 00000000cf58f000 (usable)
[    0.000000]  BIOS-e820: 00000000cf58f000 - 00000000cf59c000 (reserved)
[    0.000000]  BIOS-e820: 00000000cf59c000 - 00000000cf653000 (usable)
[    0.000000]  BIOS-e820: 00000000cf653000 - 00000000cf6a5000 (ACPI NVS)
[    0.000000]  BIOS-e820: 00000000cf6a5000 - 00000000cf6a8000 (ACPI data)
[    0.000000]  BIOS-e820: 00000000cf6a8000 - 00000000cf6ef000 (ACPI NVS)
[    0.000000]  BIOS-e820: 00000000cf6ef000 - 00000000cf6f1000 (ACPI data)
[    0.000000]  BIOS-e820: 00000000cf6f1000 - 00000000cf6f2000 (usable)
[    0.000000]  BIOS-e820: 00000000cf6f2000 - 00000000cf6ff000 (ACPI data)
[    0.000000]  BIOS-e820: 00000000cf6ff000 - 00000000cf700000 (usable)
[    0.000000]  BIOS-e820: 00000000cf700000 - 00000000d0000000 (reserved)
[    0.000000]  BIOS-e820: 00000000fff00000 - 0000000100000000 (reserved)
[    0.000000]  BIOS-e820: 0000000100000000 - 000000022c000000 (usable)
[    0.000000] Entering add_active_range(0, 0, 143) 0 entries of 256 used
[    0.000000] Entering add_active_range(0, 256, 849295) 1 entries of 256 used
[    0.000000] Entering add_active_range(0, 849308, 849491) 2 entries of 256 used
[    0.000000] Entering add_active_range(0, 849649, 849650) 3 entries of 256 used
[    0.000000] Entering add_active_range(0, 849663, 849664) 4 entries of 256 used
[    0.000000] Entering add_active_range(0, 1048576, 2277376) 5 entries of 256 used
[    0.000000] end_pfn_map = 2277376
[    0.000000] ***************
[    0.000000] **** WARNING: likely BIOS bug
[    0.000000] **** MTRRs don't cover all of memory, trimmed 16384 pages
[    0.000000] ***************
[    0.000000] DMI 2.4 present.
[    0.000000] ACPI: RSDP 000FE020, 0014 (r0 INTEL )
[    0.000000] ACPI: RSDT CF6FD038, 0050 (r1 INTEL  DG965WH       64C       1000013)
[    0.000000] ACPI: FACP CF6FC000, 0074 (r1 INTEL  DG965WH       64C MSFT  1000013)
[    0.000000] ACPI: DSDT CF6F7000, 40E9 (r1 INTEL  DG965WH       64C MSFT  1000013)
[    0.000000] ACPI: FACS CF6A8000, 0040
[    0.000000] ACPI: APIC CF6F6000, 0078 (r1 INTEL  DG965WH       64C MSFT  1000013)
[    0.000000] ACPI: WDDT CF6F5000, 0040 (r1 INTEL  DG965WH       64C MSFT  1000013)
[    0.000000] ACPI: MCFG CF6F4000, 003C (r1 INTEL  DG965WH       64C MSFT  1000013)
[    0.000000] ACPI: ASF! CF6F3000, 00A6 (r32 INTEL  DG965WH       64C MSFT  1000013)
[    0.000000] ACPI: HPET CF6F2000, 0038 (r1 INTEL  DG965WH       64C MSFT  1000013)
[    0.000000] ACPI: SSDT CF6F0000, 01BC (r1 INTEL     CpuPm      64C MSFT  1000013)
[    0.000000] ACPI: SSDT CF6EF000, 0175 (r1 INTEL   Cpu0Ist      64C MSFT  1000013)
[    0.000000] ACPI: SSDT CF6A7000, 0175 (r1 INTEL   Cpu1Ist      64C MSFT  1000013)
[    0.000000] ACPI: SSDT CF6A6000, 0175 (r1 INTEL   Cpu2Ist      64C MSFT  1000013)
[    0.000000] ACPI: SSDT CF6A5000, 0175 (r1 INTEL   Cpu3Ist      64C MSFT  1000013)
[    0.000000] Entering add_active_range(0, 0, 143) 0 entries of 256 used
[    0.000000] Entering add_active_range(0, 256, 849295) 1 entries of 256 used
[    0.000000] Entering add_active_range(0, 849308, 849491) 2 entries of 256 used
[    0.000000] Entering add_active_range(0, 849649, 849650) 3 entries of 256 used
[    0.000000] Entering add_active_range(0, 849663, 849664) 4 entries of 256 used
[    0.000000] Entering add_active_range(0, 1048576, 2260992) 5 entries of 256 used
[    0.000000] Zone PFN ranges:
[    0.000000]   DMA             0 ->     4096
[    0.000000]   DMA32        4096 ->  1048576
[    0.000000]   Normal    1048576 ->  2260992
[    0.000000] early_node_map[6] active PFN ranges
[    0.000000]     0:        0 ->      143
[    0.000000]     0:      256 ->   849295
[    0.000000]     0:   849308 ->   849491
[    0.000000]     0:   849649 ->   849650
[    0.000000]     0:   849663 ->   849664
[    0.000000]     0:  1048576 ->  2260992
[    0.000000] On node 0 totalpages: 2061783
[    0.000000]   DMA zone: 56 pages used for memmap
[    0.000000]   DMA zone: 1395 pages reserved
[    0.000000]   DMA zone: 2532 pages, LIFO batch:0
[    0.000000]   DMA32 zone: 14280 pages used for memmap
[    0.000000]   DMA32 zone: 831104 pages, LIFO batch:31
[    0.000000]   Normal zone: 16576 pages used for memmap
[    0.000000]   Normal zone: 1195840 pages, LIFO batch:31
[    0.000000] ACPI: PM-Timer IO Port: 0x408
[    0.000000] ACPI: Local APIC address 0xfee00000
[    0.000000] ACPI: LAPIC (acpi_id[0x01] lapic_id[0x00] enabled)
[    0.000000] Processor #0 (Bootup-CPU)
[    0.000000] ACPI: LAPIC (acpi_id[0x03] lapic_id[0x02] enabled)
[    0.000000] Processor #2
[    0.000000] ACPI: LAPIC (acpi_id[0x02] lapic_id[0x01] enabled)
[    0.000000] Processor #1
[    0.000000] ACPI: LAPIC (acpi_id[0x04] lapic_id[0x03] enabled)
[    0.000000] Processor #3
[    0.000000] ACPI: LAPIC_NMI (acpi_id[0x01] dfl dfl lint[0x1])
[    0.000000] ACPI: LAPIC_NMI (acpi_id[0x02] dfl dfl lint[0x1])
[    0.000000] ACPI: IOAPIC (id[0x02] address[0xfec00000] gsi_base[0])
[    0.000000] IOAPIC[0]: apic_id 2, address 0xfec00000, GSI 0-23
[    0.000000] ACPI: INT_SRC_OVR (bus 0 bus_irq 0 global_irq 2 dfl dfl)
[    0.000000] ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 high level)
[    0.000000] ACPI: IRQ0 used by override.
[    0.000000] ACPI: IRQ2 used by override.
[    0.000000] ACPI: IRQ9 used by override.
[    0.000000] Setting APIC routing to flat
[    0.000000] ACPI: HPET id: 0x8086a201 base: 0xfed00000
[    0.000000] Using ACPI (MADT) for SMP configuration information
[    0.000000] Allocating PCI resources starting at d4000000 (gap: d0000000:2ff00000)
[    0.000000] PERCPU: Allocating 24744 bytes of per cpu data
[    0.000000] Built 1 zonelists.  Total pages: 2029476
[    0.000000] Kernel command line: auto BOOT_IMAGE=2.6.22-rc4-4 ro root=902 netconsole=4444@192.168.168.253/eth0,514@192.168.168.254/00:50:8D:ED:3C:E7
[    0.000000] netconsole: local port 4444
[    0.000000] netconsole: local IP 192.168.168.253
[    0.000000] netconsole: interface eth0
[    0.000000] netconsole: remote port 514
[    0.000000] netconsole: remote IP 192.168.168.254
[    0.000000] netconsole: remote ethernet address 00:50:8d:ed:3c:e7
[    0.000000] Initializing CPU#0
[    0.000000] PID hash table entries: 4096 (order: 12, 32768 bytes)
[   48.130591] time.c: Detected 2397.606 MHz processor.
[   48.131666] Console: colour VGA+ 80x25
[   48.139532] Dentry cache hash table entries: 1048576 (order: 11, 8388608 bytes)
[   48.145756] Inode-cache hash table entries: 524288 (order: 10, 4194304 bytes)
[   48.146855] Checking aperture...
[   48.146909] PCI-DMA: Using software bounce buffering for IO (SWIOTLB)
[   48.186683] Placing software IO TLB between 0x951a000 - 0xd51a000
[   48.256438] Memory: 8039012k/9043968k available (3549k kernel code, 207808k reserved, 1317k data, 216k init)
[   48.315841] Calibrating delay using timer specific routine.. 4797.71 BogoMIPS (lpj=2398858)
[   48.315963] Mount-cache hash table entries: 256
[   48.316090] CPU: L1 I cache: 32K, L1 D cache: 32K
[   48.316152] CPU: L2 cache: 4096K
[   48.316192] using mwait in idle threads.
[   48.316234] CPU: Physical Processor ID: 0
[   48.316275] CPU: Processor Core ID: 0
[   48.316320] CPU0: Thermal monitoring enabled (TM2)
[   48.316367] Freeing SMP alternatives: 33k freed
[   48.316433] ACPI: Core revision 20070126
[   48.329824] Using local APIC timer interrupts.
[   48.371568] result 16650025
[   48.371607] Detected 16.650 MHz APIC timer.
[   48.371903] Booting processor 1/4 APIC 0x2
[   48.382292] Initializing CPU#1
[   48.442807] Calibrating delay using timer specific routine.. 4795.14 BogoMIPS (lpj=2397570)
[   48.442814] CPU: L1 I cache: 32K, L1 D cache: 32K
[   48.442816] CPU: L2 cache: 4096K
[   48.442818] CPU: Physical Processor ID: 0
[   48.442819] CPU: Processor Core ID: 2
[   48.442824] CPU1: Thermal monitoring enabled (TM2)
[   48.443247] Intel(R) Core(TM)2 Quad CPU           @ 2.40GHz stepping 07
[   48.443310] checking TSC synchronization [CPU#0 -> CPU#1]: passed.
[   48.463868] Booting processor 2/4 APIC 0x1
[   48.474628] Initializing CPU#2
[   48.534787] Calibrating delay using timer specific routine.. 4795.30 BogoMIPS (lpj=2397650)
[   48.534793] CPU: L1 I cache: 32K, L1 D cache: 32K
[   48.534794] CPU: L2 cache: 4096K
[   48.534796] CPU: Physical Processor ID: 0
[   48.534797] CPU: Processor Core ID: 1
[   48.534802] CPU2: Thermal monitoring enabled (TM2)
[   48.535246] Intel(R) Core(TM)2 Quad CPU           @ 2.40GHz stepping 07
[   48.535273] checking TSC synchronization [CPU#0 -> CPU#2]: passed.
[   48.555867] Booting processor 3/4 APIC 0x3
[   48.566258] Initializing CPU#3
[   48.626765] Calibrating delay using timer specific routine.. 4795.21 BogoMIPS (lpj=2397608)
[   48.626771] CPU: L1 I cache: 32K, L1 D cache: 32K
[   48.626773] CPU: L2 cache: 4096K
[   48.626775] CPU: Physical Processor ID: 0
[   48.626776] CPU: Processor Core ID: 3
[   48.626782] CPU3: Thermal monitoring enabled (TM2)
[   48.627204] Intel(R) Core(TM)2 Quad CPU           @ 2.40GHz stepping 07
[   48.627274] checking TSC synchronization [CPU#0 -> CPU#3]: passed.
[   48.647766] Brought up 4 CPUs
[   49.290497] migration_cost=26,3427
[   49.290867] NET: Registered protocol family 16
[   49.290968] ACPI: bus type pci registered
[   49.291017] PCI: BIOS Bug: MCFG area at f0000000 is not E820-reserved
[   49.291064] PCI: Not using MMCONFIG.
[   49.291105] PCI: Using configuration type 1
[   49.293817] ACPI: Interpreter enabled
[   49.293859] ACPI: Using IOAPIC for interrupt routing
[   49.297371] ACPI: PCI Root Bridge [PCI0] (0000:00)
[   49.297430] PCI: Probing PCI hardware (bus 00)
[   49.298083] PCI quirk: region 0400-047f claimed by ICH6 ACPI/GPIO/TCO
[   49.298132] PCI quirk: region 0500-053f claimed by ICH6 GPIO
[   49.299041] PCI: Transparent bridge - 0000:00:1e.0
[   49.299139] ACPI: PCI Interrupt Routing Table [\_SB_.PCI0._PRT]
[   49.299405] ACPI: PCI Interrupt Routing Table [\_SB_.PCI0.P32_._PRT]
[   49.299607] ACPI: PCI Interrupt Routing Table [\_SB_.PCI0.PEX0._PRT]
[   49.299689] ACPI: PCI Interrupt Routing Table [\_SB_.PCI0.PEX1._PRT]
[   49.299768] ACPI: PCI Interrupt Routing Table [\_SB_.PCI0.PEX2._PRT]
[   49.299848] ACPI: PCI Interrupt Routing Table [\_SB_.PCI0.PEX3._PRT]
[   49.299927] ACPI: PCI Interrupt Routing Table [\_SB_.PCI0.PEX4._PRT]
[   49.303177] ACPI: PCI Interrupt Link [LNKA] (IRQs 3 4 5 7 9 10 *11 12)
[   49.303465] ACPI: PCI Interrupt Link [LNKB] (IRQs 3 4 5 7 9 *10 11 12)
[   49.303754] ACPI: PCI Interrupt Link [LNKC] (IRQs 3 4 5 7 9 10 *11 12)
[   49.304060] ACPI: PCI Interrupt Link [LNKD] (IRQs 3 4 5 7 9 10 *11 12)
[   49.304364] ACPI: PCI Interrupt Link [LNKE] (IRQs 3 4 5 7 *9 10 11 12)
[   49.304653] ACPI: PCI Interrupt Link [LNKF] (IRQs 3 4 5 7 9 *10 11 12)
[   49.304938] ACPI: PCI Interrupt Link [LNKG] (IRQs 3 4 5 7 *9 10 11 12)
[   49.305263] ACPI: PCI Interrupt Link [LNKH] (IRQs 3 4 5 7 9 10 *11 12)
[   49.305553] Linux Plug and Play Support v0.97 (c) Adam Belay
[   49.305604] pnp: PnP ACPI init
[   49.305650] ACPI: bus type pnp registered
[   49.307458] pnp: PnP ACPI: found 12 devices
[   49.307500] ACPI: ACPI bus type pnp unregistered
[   49.307664] SCSI subsystem initialized
[   49.307760] libata version 2.21 loaded.
[   49.307814] usbcore: registered new interface driver usbfs
[   49.307881] usbcore: registered new interface driver hub
[   49.307950] usbcore: registered new device driver usb
[   49.308022] PCI: Using ACPI for IRQ routing
[   49.308064] PCI: If a device doesn't work, try "pci=routeirq".  If it helps, post a report
[   49.308236] PCI-GART: No AMD northbridge found.
[   49.308281] hpet0: at MMIO 0xfed00000, IRQs 2, 8, 0
[   49.308415] hpet0: 3 64-bit timers, 14318180 Hz
[   49.309523] pnp: 00:01: iomem range 0xf0000000-0xf7ffffff has been reserved
[   49.309573] pnp: 00:01: iomem range 0xfed13000-0xfed13fff has been reserved
[   49.309622] Time: tsc clocksource has been installed.
[   49.309667] pnp: 00:01: iomem range 0xfed14000-0xfed17fff has been reserved
[   49.309715] pnp: 00:01: iomem range 0xfed18000-0xfed18fff has been reserved
[   49.309768] pnp: 00:06: ioport range 0x500-0x53f has been reserved
[   49.309814] pnp: 00:06: ioport range 0x400-0x47f has been reserved
[   49.310422] pnp: 00:06: ioport range 0x680-0x6ff has been reserved
[   49.310738] ACPI: PCI Interrupt 0000:06:03.0[A] -> GSI 19 (level, low) -> IRQ 19
[   49.360956] ohci1394: fw-host0: OHCI-1394 1.1 (PCI): IRQ=[19]  MMIO=[e0084000-e00847ff]  Max Packet=[2048]  IR/IT contexts=[4/8]
[   49.361085] PCI: Bridge: 0000:00:1c.0
[   49.361125]   IO window: disabled.
[   49.361168]   MEM window: e0700000-e07fffff
[   49.361211]   PREFETCH window: disabled.
[   49.361275] PCI: Bridge: 0000:00:1c.1
[   49.361317]   IO window: 5000-5fff
[   49.361359]   MEM window: e0400000-e04fffff
[   49.361413]   PREFETCH window: disabled.
[   49.361466] PCI: Bridge: 0000:00:1c.2
[   49.361508]   IO window: 4000-4fff
[   49.361552]   MEM window: e0300000-e03fffff
[   49.361597]   PREFETCH window: e0800000-e08fffff
[   49.361643] PCI: Bridge: 0000:00:1c.3
[   49.361684]   IO window: 3000-3fff
[   49.361726]   MEM window: e0200000-e02fffff
[   49.361770]   PREFETCH window: e0900000-e09fffff
[   49.361815] PCI: Bridge: 0000:00:1c.4
[   49.361857]   IO window: 2000-2fff
[   49.361899]   MEM window: e0100000-e01fffff
[   49.361943]   PREFETCH window: e0a00000-e0afffff
[   49.361988] PCI: Bridge: 0000:00:1e.0
[   49.362030]   IO window: 1000-1fff
[   49.362072]   MEM window: e0000000-e00fffff
[   49.362116]   PREFETCH window: e0b00000-e0bfffff
[   49.362172] ACPI: PCI Interrupt 0000:00:1c.0[A] -> GSI 17 (level, low) -> IRQ 17
[   49.362256] PCI: Setting latency timer of device 0000:00:1c.0 to 64
[   49.362280] ACPI: PCI Interrupt 0000:00:1c.1[B] -> GSI 16 (level, low) -> IRQ 16
[   49.362373] PCI: Setting latency timer of device 0000:00:1c.1 to 64
[   49.362386] ACPI: PCI Interrupt 0000:00:1c.2[C] -> GSI 18 (level, low) -> IRQ 18
[   49.362490] PCI: Setting latency timer of device 0000:00:1c.2 to 64
[   49.362502] ACPI: PCI Interrupt 0000:00:1c.3[D] -> GSI 19 (level, low) -> IRQ 19
[   49.362586] PCI: Setting latency timer of device 0000:00:1c.3 to 64
[   49.362600] ACPI: PCI Interrupt 0000:00:1c.4[A] -> GSI 17 (level, low) -> IRQ 17
[   49.362683] PCI: Setting latency timer of device 0000:00:1c.4 to 64
[   49.362691] PCI: Setting latency timer of device 0000:00:1e.0 to 64
[   49.362739] NET: Registered protocol family 2
[   49.371587] IP route cache hash table entries: 262144 (order: 9, 2097152 bytes)
[   49.372107] TCP established hash table entries: 131072 (order: 9, 3145728 bytes)
[   49.373400] TCP bind hash table entries: 65536 (order: 8, 1048576 bytes)
[   49.373900] TCP: Hash tables configured (established 131072 bind 65536)
[   49.373948] TCP reno registered
[   49.377996] Installing knfsd (copyright (C) 1996 okir@monad.swb.de).
[   49.378132] SGI XFS with large block/inode numbers, no debug enabled
[   49.378483] io scheduler noop registered
[   49.378525] io scheduler anticipatory registered (default)
[   49.378575] Boot video device is 0000:00:02.0
[   49.379026] PCI: Setting latency timer of device 0000:00:1c.0 to 64
[   49.379058] assign_interrupt_mode Found MSI capability
[   49.379129] Allocate Port Service[0000:00:1c.0:pcie00]
[   49.379177] Allocate Port Service[0000:00:1c.0:pcie02]
[   49.379263] PCI: Setting latency timer of device 0000:00:1c.1 to 64
[   49.379294] assign_interrupt_mode Found MSI capability
[   49.379362] Allocate Port Service[0000:00:1c.1:pcie00]
[   49.379410] Allocate Port Service[0000:00:1c.1:pcie02]
[   49.379494] PCI: Setting latency timer of device 0000:00:1c.2 to 64
[   49.379525] assign_interrupt_mode Found MSI capability
[   49.379603] Allocate Port Service[0000:00:1c.2:pcie00]
[   49.379638] Allocate Port Service[0000:00:1c.2:pcie02]
[   49.379725] PCI: Setting latency timer of device 0000:00:1c.3 to 64
[   49.379763] assign_interrupt_mode Found MSI capability
[   49.379838] Allocate Port Service[0000:00:1c.3:pcie00]
[   49.379873] Allocate Port Service[0000:00:1c.3:pcie02]
[   49.379960] PCI: Setting latency timer of device 0000:00:1c.4 to 64
[   49.379996] assign_interrupt_mode Found MSI capability
[   49.380073] Allocate Port Service[0000:00:1c.4:pcie00]
[   49.380133] Allocate Port Service[0000:00:1c.4:pcie02]
[   49.382757] lp: driver loaded but no devices found
[   49.382849] Real Time Clock Driver v1.12ac
[   49.383002] hpet_resources: 0xfed00000 is busy
[   49.383012] Linux agpgart interface v0.102 (c) Dave Jones
[   49.383135] agpgart: Detected an Intel 965G Chipset.
[   49.384311] agpgart: Detected 7676K stolen memory.
[   49.396970] agpgart: AGP aperture is 256M @ 0xd0000000
[   49.397047] [drm] Initialized drm 1.1.0 20060810
[   49.397105] ACPI: PCI Interrupt 0000:00:02.0[A] -> GSI 16 (level, low) -> IRQ 16
[   49.397255] [drm] Initialized i915 1.6.0 20060119 on minor 0
[   49.397396] parport_pc 00:07: reported by Plug and Play ACPI
[   49.397520] parport0: PC-style at 0x378 (0x778), irq 7, using FIFO [PCSPP,TRISTATE,COMPAT,ECP]
[   49.479882] lp0: using parport0 (interrupt-driven).
[   49.480096] loop: module loaded
[   49.480139] Intel(R) PRO/1000 Network Driver - version 7.3.20-k2-NAPI
[   49.480188] Copyright (c) 1999-2006 Intel Corporation.
[   49.480275] ACPI: PCI Interrupt 0000:00:19.0[A] -> GSI 20 (level, low) -> IRQ 20
[   49.480369] PCI: Setting latency timer of device 0000:00:19.0 to 64
[   49.497933] e1000: 0000:00:19.0: e1000_probe: (PCI Express:2.5Gb/s:Width x1) 00:19:d1:6e:9d:43
[   49.573213] e1000: eth0: e1000_probe: Intel(R) PRO/1000 Network Connection
[   49.573300] ACPI: PCI Interrupt 0000:06:00.0[A] -> GSI 21 (level, low) -> IRQ 21
[   49.834558] e1000: 0000:06:00.0: e1000_probe: (PCI:33MHz:32-bit) 00:0e:0c:00:cd:b1
[   49.981554] e1000: eth1: e1000_probe: Intel(R) PRO/1000 Network Connection
[   49.981615] ACPI: PCI Interrupt 0000:06:01.0[A] -> GSI 22 (level, low) -> IRQ 22
[   50.228621] e1000: 0000:06:01.0: e1000_probe: (PCI:33MHz:32-bit) 00:07:e9:29:37:db
[   50.255346] e1000: eth2: e1000_probe: Intel(R) PRO/1000 Network Connection
[   50.255480] netconsole: device eth0 not up yet, forcing it
[   50.617119] ieee1394: Host added: ID:BUS[0-00:1023]  GUID[0090270001c5be9f]
[   52.334278] e1000: eth0: e1000_watchdog: NIC Link is Up 1000 Mbps Full Duplex, Flow Control: RX/TX
[   52.358716] netconsole: network logging started
[   52.358929] ahci 0000:00:1f.2: version 2.2
[   52.358946] ACPI: PCI Interrupt 0000:00:1f.2[A] -> GSI 19 (level, low) -> IRQ 19
[   53.359807] ahci 0000:00:1f.2: AHCI 0001.0100 32 slots 6 ports 3 Gbps 0x3f impl SATA mode
[   53.359880] ahci 0000:00:1f.2: flags: 64bit ncq led clo pio slum part 
[   53.359936] PCI: Setting latency timer of device 0000:00:1f.2 to 64
[   53.360168] scsi0 : ahci
[   53.360286] scsi1 : ahci
[   53.360386] scsi2 : ahci
[   53.360487] scsi3 : ahci
[   53.360585] scsi4 : ahci
[   53.360700] scsi5 : ahci
[   53.360795] ata1: SATA max UDMA/133 cmd 0xffffc2000002e100 ctl 0x0000000000000000 bmdma 0x0000000000000000 irq 0
[   53.360874] ata2: SATA max UDMA/133 cmd 0xffffc2000002e180 ctl 0x0000000000000000 bmdma 0x0000000000000000 irq 0
[   53.360952] ata3: SATA max UDMA/133 cmd 0xffffc2000002e200 ctl 0x0000000000000000 bmdma 0x0000000000000000 irq 0
[   53.361030] ata4: SATA max UDMA/133 cmd 0xffffc2000002e280 ctl 0x0000000000000000 bmdma 0x0000000000000000 irq 0
[   53.361108] ata5: SATA max UDMA/133 cmd 0xffffc2000002e300 ctl 0x0000000000000000 bmdma 0x0000000000000000 irq 0
[   53.361205] ata6: SATA max UDMA/133 cmd 0xffffc2000002e380 ctl 0x0000000000000000 bmdma 0x0000000000000000 irq 0
[   53.820558] ata1: SATA link up 1.5 Gbps (SStatus 113 SControl 300)
[   53.823973] ata1.00: ata_hpa_resize 1: sectors = 145226112, hpa_sectors = 145226112
[   53.824043] ata1.00: ATA-6: WDC WD740GD-00FLC0, 33.08F33, max UDMA/133
[   53.824098] ata1.00: 145226112 sectors, multi 0: LBA48 
[   53.828532] ata1.00: ata_hpa_resize 1: sectors = 145226112, hpa_sectors = 145226112
[   53.828605] ata1.00: configured for UDMA/133
[   54.286854] ata2: SATA link up 1.5 Gbps (SStatus 113 SControl 300)
[   54.290267] ata2.00: ata_hpa_resize 1: sectors = 145226112, hpa_sectors = 145226112
[   54.290337] ata2.00: ATA-6: WDC WD740GD-00FLC0, 33.08F33, max UDMA/133
[   54.290391] ata2.00: 145226112 sectors, multi 0: LBA48 
[   54.294838] ata2.00: ata_hpa_resize 1: sectors = 145226112, hpa_sectors = 145226112
[   54.294909] ata2.00: configured for UDMA/133
[   54.755252] ata3: SATA link up 1.5 Gbps (SStatus 113 SControl 300)
[   54.757454] ata3.00: ata_hpa_resize 1: sectors = 293046768, hpa_sectors = 293046768
[   54.757534] ata3.00: ATA-7: WDC WD1500ADFD-00NLR1, 20.07P20, max UDMA/133
[   54.757606] ata3.00: 293046768 sectors, multi 0: LBA48 NCQ (depth 31/32)
[   54.760453] ata3.00: ata_hpa_resize 1: sectors = 293046768, hpa_sectors = 293046768
[   54.760525] ata3.00: configured for UDMA/133
[   55.219479] ata4: SATA link up 1.5 Gbps (SStatus 113 SControl 300)
[   55.221656] ata4.00: ata_hpa_resize 1: sectors = 293046768, hpa_sectors = 293046768
[   55.221727] ata4.00: ATA-7: WDC WD1500ADFD-00NLR1, 20.07P20, max UDMA/133
[   55.221783] ata4.00: 293046768 sectors, multi 0: LBA48 NCQ (depth 31/32)
[   55.224617] ata4.00: ata_hpa_resize 1: sectors = 293046768, hpa_sectors = 293046768
[   55.224689] ata4.00: configured for UDMA/133
[   55.682687] ata5: SATA link up 1.5 Gbps (SStatus 113 SControl 300)
[   55.684748] ata5.00: ata_hpa_resize 1: sectors = 293046768, hpa_sectors = 293046768
[   55.684819] ata5.00: ATA-7: WDC WD1500ADFD-00NLR1, 20.07P20, max UDMA/133
[   55.684874] ata5.00: 293046768 sectors, multi 0: LBA48 NCQ (depth 31/32)
[   55.687539] ata5.00: ata_hpa_resize 1: sectors = 293046768, hpa_sectors = 293046768
[   55.687611] ata5.00: configured for UDMA/133
[   56.146925] ata6: SATA link up 1.5 Gbps (SStatus 113 SControl 300)
[   56.148986] ata6.00: ata_hpa_resize 1: sectors = 293046768, hpa_sectors = 293046768
[   56.149058] ata6.00: ATA-7: WDC WD1500ADFD-00NLR1, 20.07P20, max UDMA/133
[   56.149113] ata6.00: 293046768 sectors, multi 0: LBA48 NCQ (depth 31/32)
[   56.151795] ata6.00: ata_hpa_resize 1: sectors = 293046768, hpa_sectors = 293046768
[   56.151867] ata6.00: configured for UDMA/133
[   56.151994] scsi 0:0:0:0: Direct-Access     ATA      WDC WD740GD-00FL 33.0 PQ: 0 ANSI: 5
[   56.152166] sd 0:0:0:0: [sda] 145226112 512-byte hardware sectors (74356 MB)
[   56.152223] sd 0:0:0:0: [sda] Write Protect is off
[   56.152279] sd 0:0:0:0: [sda] Mode Sense: 00 3a 00 00
[   56.152290] sd 0:0:0:0: [sda] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[   56.152401] sd 0:0:0:0: [sda] 145226112 512-byte hardware sectors (74356 MB)
[   56.152468] sd 0:0:0:0: [sda] Write Protect is off
[   56.152524] sd 0:0:0:0: [sda] Mode Sense: 00 3a 00 00
[   56.152534] sd 0:0:0:0: [sda] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[   56.152616]  sda: sda1 sda2 sda3
[   56.163015] sd 0:0:0:0: [sda] Attached SCSI disk
[   56.163184] scsi 1:0:0:0: Direct-Access     ATA      WDC WD740GD-00FL 33.0 PQ: 0 ANSI: 5
[   56.163310] sd 1:0:0:0: [sdb] 145226112 512-byte hardware sectors (74356 MB)
[   56.163367] sd 1:0:0:0: [sdb] Write Protect is off
[   56.163414] sd 1:0:0:0: [sdb] Mode Sense: 00 3a 00 00
[   56.163425] sd 1:0:0:0: [sdb] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[   56.163531] sd 1:0:0:0: [sdb] 145226112 512-byte hardware sectors (74356 MB)
[   56.163596] sd 1:0:0:0: [sdb] Write Protect is off
[   56.163643] sd 1:0:0:0: [sdb] Mode Sense: 00 3a 00 00
[   56.163656] sd 1:0:0:0: [sdb] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[   56.163744]  sdb: sdb1 sdb2 sdb3
[   56.167415] sd 1:0:0:0: [sdb] Attached SCSI disk
[   56.167520] scsi 2:0:0:0: Direct-Access     ATA      WDC WD1500ADFD-0 20.0 PQ: 0 ANSI: 5
[   56.167651] sd 2:0:0:0: [sdc] 293046768 512-byte hardware sectors (150040 MB)
[   56.167713] sd 2:0:0:0: [sdc] Write Protect is off
[   56.167760] sd 2:0:0:0: [sdc] Mode Sense: 00 3a 00 00
[   56.167770] sd 2:0:0:0: [sdc] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[   56.167880] sd 2:0:0:0: [sdc] 293046768 512-byte hardware sectors (150040 MB)
[   56.167938] sd 2:0:0:0: [sdc] Write Protect is off
[   56.167985] sd 2:0:0:0: [sdc] Mode Sense: 00 3a 00 00
[   56.167995] sd 2:0:0:0: [sdc] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[   56.168068]  sdc: sdc1
[   56.173303] sd 2:0:0:0: [sdc] Attached SCSI disk
[   56.173408] scsi 3:0:0:0: Direct-Access     ATA      WDC WD1500ADFD-0 20.0 PQ: 0 ANSI: 5
[   56.173528] sd 3:0:0:0: [sdd] 293046768 512-byte hardware sectors (150040 MB)
[   56.173583] sd 3:0:0:0: [sdd] Write Protect is off
[   56.173630] sd 3:0:0:0: [sdd] Mode Sense: 00 3a 00 00
[   56.173640] sd 3:0:0:0: [sdd] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[   56.173734] sd 3:0:0:0: [sdd] 293046768 512-byte hardware sectors (150040 MB)
[   56.173788] sd 3:0:0:0: [sdd] Write Protect is off
[   56.173855] sd 3:0:0:0: [sdd] Mode Sense: 00 3a 00 00
[   56.173865] sd 3:0:0:0: [sdd] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[   56.173936]  sdd: sdd1
[   56.182236] sd 3:0:0:0: [sdd] Attached SCSI disk
[   56.182379] scsi 4:0:0:0: Direct-Access     ATA      WDC WD1500ADFD-0 20.0 PQ: 0 ANSI: 5
[   56.182498] sd 4:0:0:0: [sde] 293046768 512-byte hardware sectors (150040 MB)
[   56.182552] sd 4:0:0:0: [sde] Write Protect is off
[   56.182599] sd 4:0:0:0: [sde] Mode Sense: 00 3a 00 00
[   56.182609] sd 4:0:0:0: [sde] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[   56.182704] sd 4:0:0:0: [sde] 293046768 512-byte hardware sectors (150040 MB)
[   56.182758] sd 4:0:0:0: [sde] Write Protect is off
[   56.182805] sd 4:0:0:0: [sde] Mode Sense: 00 3a 00 00
[   56.182815] sd 4:0:0:0: [sde] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[   56.182886]  sde: sde1
[   56.191616] sd 4:0:0:0: [sde] Attached SCSI disk
[   56.191720] scsi 5:0:0:0: Direct-Access     ATA      WDC WD1500ADFD-0 20.0 PQ: 0 ANSI: 5
[   56.191835] sd 5:0:0:0: [sdf] 293046768 512-byte hardware sectors (150040 MB)
[   56.191891] sd 5:0:0:0: [sdf] Write Protect is off
[   56.191938] sd 5:0:0:0: [sdf] Mode Sense: 00 3a 00 00
[   56.191949] sd 5:0:0:0: [sdf] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[   56.192045] sd 5:0:0:0: [sdf] 293046768 512-byte hardware sectors (150040 MB)
[   56.192099] sd 5:0:0:0: [sdf] Write Protect is off
[   56.192146] sd 5:0:0:0: [sdf] Mode Sense: 00 3a 00 00
[   56.192156] sd 5:0:0:0: [sdf] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[   56.192248]  sdf: sdf1
[   56.200792] sd 5:0:0:0: [sdf] Attached SCSI disk
[   56.200925] sata_sil24 0000:03:00.0: version 0.9
[   56.200945] ACPI: PCI Interrupt 0000:03:00.0[A] -> GSI 18 (level, low) -> IRQ 18
[   56.201098] PCI: Setting latency timer of device 0000:03:00.0 to 64
[   56.201152] scsi6 : sata_sil24
[   56.201229] scsi7 : sata_sil24
[   56.201302] ata7: SATA max UDMA/100 cmd 0xffffc20000038000 ctl 0x0000000000000000 bmdma 0x0000000000000000 irq 0
[   56.201375] ata8: SATA max UDMA/100 cmd 0xffffc2000003a000 ctl 0x0000000000000000 bmdma 0x0000000000000000 irq 0
[   56.605136] ata7: SATA link up 1.5 Gbps (SStatus 113 SControl 300)
[   56.607673] ata7.00: ata_hpa_resize 1: sectors = 293046768, hpa_sectors = 293046768
[   56.607744] ata7.00: ATA-7: WDC WD1500ADFD-00NLR1, 20.07P20, max UDMA/133
[   56.607800] ata7.00: 293046768 sectors, multi 16: LBA48 NCQ (depth 31/32)
[   56.611033] ata7.00: ata_hpa_resize 1: sectors = 293046768, hpa_sectors = 293046768
[   56.611106] ata7.00: configured for UDMA/100
[   57.015233] ata8: SATA link up 1.5 Gbps (SStatus 113 SControl 300)
[   57.017747] ata8.00: ata_hpa_resize 1: sectors = 293046768, hpa_sectors = 293046768
[   57.017818] ata8.00: ATA-7: WDC WD1500ADFD-00NLR1, 20.07P20, max UDMA/133
[   57.017875] ata8.00: 293046768 sectors, multi 16: LBA48 NCQ (depth 31/32)
[   57.021110] ata8.00: ata_hpa_resize 1: sectors = 293046768, hpa_sectors = 293046768
[   57.021182] ata8.00: configured for UDMA/100
[   57.021288] scsi 6:0:0:0: Direct-Access     ATA      WDC WD1500ADFD-0 20.0 PQ: 0 ANSI: 5
[   57.021440] sd 6:0:0:0: [sdg] 293046768 512-byte hardware sectors (150040 MB)
[   57.021507] sd 6:0:0:0: [sdg] Write Protect is off
[   57.021554] sd 6:0:0:0: [sdg] Mode Sense: 00 3a 00 00
[   57.021565] sd 6:0:0:0: [sdg] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[   57.021678] sd 6:0:0:0: [sdg] 293046768 512-byte hardware sectors (150040 MB)
[   57.021735] sd 6:0:0:0: [sdg] Write Protect is off
[   57.021803] sd 6:0:0:0: [sdg] Mode Sense: 00 3a 00 00
[   57.021813] sd 6:0:0:0: [sdg] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[   57.021885]  sdg: sdg1
[   57.029146] sd 6:0:0:0: [sdg] Attached SCSI disk
[   57.029286] scsi 7:0:0:0: Direct-Access     ATA      WDC WD1500ADFD-0 20.0 PQ: 0 ANSI: 5
[   57.029404] sd 7:0:0:0: [sdh] 293046768 512-byte hardware sectors (150040 MB)
[   57.029458] sd 7:0:0:0: [sdh] Write Protect is off
[   57.029505] sd 7:0:0:0: [sdh] Mode Sense: 00 3a 00 00
[   57.029516] sd 7:0:0:0: [sdh] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[   57.029612] sd 7:0:0:0: [sdh] 293046768 512-byte hardware sectors (150040 MB)
[   57.029666] sd 7:0:0:0: [sdh] Write Protect is off
[   57.029713] sd 7:0:0:0: [sdh] Mode Sense: 00 3a 00 00
[   57.029723] sd 7:0:0:0: [sdh] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[   57.029816]  sdh: sdh1
[   57.037301] sd 7:0:0:0: [sdh] Attached SCSI disk
[   57.037420] ACPI: PCI Interrupt 0000:04:00.0[A] -> GSI 19 (level, low) -> IRQ 19
[   57.037567] PCI: Setting latency timer of device 0000:04:00.0 to 64
[   57.037611] scsi8 : sata_sil24
[   57.037687] scsi9 : sata_sil24
[   57.037757] ata9: SATA max UDMA/100 cmd 0xffffc20000068000 ctl 0x0000000000000000 bmdma 0x0000000000000000 irq 0
[   57.037832] ata10: SATA max UDMA/100 cmd 0xffffc2000006a000 ctl 0x0000000000000000 bmdma 0x0000000000000000 irq 0
[   57.444772] ata9: SATA link up 1.5 Gbps (SStatus 113 SControl 300)
[   57.446869] ata9.00: ata_hpa_resize 1: sectors = 293046768, hpa_sectors = 293046768
[   57.446940] ata9.00: ATA-7: WDC WD1500ADFD-00NLR1, 20.07P20, max UDMA/133
[   57.447004] ata9.00: 293046768 sectors, multi 16: LBA48 NCQ (depth 31/32)
[   57.449754] ata9.00: ata_hpa_resize 1: sectors = 293046768, hpa_sectors = 293046768
[   57.449826] ata9.00: configured for UDMA/100
[   57.854989] ata10: SATA link up 1.5 Gbps (SStatus 113 SControl 300)
[   57.857111] ata10.00: ata_hpa_resize 1: sectors = 293046768, hpa_sectors = 293046768
[   57.857182] ata10.00: ATA-7: WDC WD1500ADFD-00NLR1, 20.07P20, max UDMA/133
[   57.857256] ata10.00: 293046768 sectors, multi 16: LBA48 NCQ (depth 31/32)
[   57.860018] ata10.00: ata_hpa_resize 1: sectors = 293046768, hpa_sectors = 293046768
[   57.860092] ata10.00: configured for UDMA/100
[   57.860198] scsi 8:0:0:0: Direct-Access     ATA      WDC WD1500ADFD-0 20.0 PQ: 0 ANSI: 5
[   57.860352] sd 8:0:0:0: [sdi] 293046768 512-byte hardware sectors (150040 MB)
[   57.860406] sd 8:0:0:0: [sdi] Write Protect is off
[   57.860453] sd 8:0:0:0: [sdi] Mode Sense: 00 3a 00 00
[   57.860464] sd 8:0:0:0: [sdi] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[   57.860574] sd 8:0:0:0: [sdi] 293046768 512-byte hardware sectors (150040 MB)
[   57.860632] sd 8:0:0:0: [sdi] Write Protect is off
[   57.860680] sd 8:0:0:0: [sdi] Mode Sense: 00 3a 00 00
[   57.860690] sd 8:0:0:0: [sdi] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[   57.860761]  sdi: sdi1
[   57.866025] sd 8:0:0:0: [sdi] Attached SCSI disk
[   57.866160] scsi 9:0:0:0: Direct-Access     ATA      WDC WD1500ADFD-0 20.0 PQ: 0 ANSI: 5
[   57.866311] sd 9:0:0:0: [sdj] 293046768 512-byte hardware sectors (150040 MB)
[   57.866365] sd 9:0:0:0: [sdj] Write Protect is off
[   57.866412] sd 9:0:0:0: [sdj] Mode Sense: 00 3a 00 00
[   57.866422] sd 9:0:0:0: [sdj] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[   57.866533] sd 9:0:0:0: [sdj] 293046768 512-byte hardware sectors (150040 MB)
[   57.866591] sd 9:0:0:0: [sdj] Write Protect is off
[   57.866649] sd 9:0:0:0: [sdj] Mode Sense: 00 3a 00 00
[   57.866660] sd 9:0:0:0: [sdj] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[   57.866732]  sdj: sdj1
[   57.871781] sd 9:0:0:0: [sdj] Attached SCSI disk
[   57.871887] ACPI: PCI Interrupt 0000:05:00.0[A] -> GSI 16 (level, low) -> IRQ 16
[   57.872070] PCI: Setting latency timer of device 0000:05:00.0 to 64
[   57.872120] scsi10 : sata_sil24
[   57.872195] scsi11 : sata_sil24
[   57.872265] ata11: SATA max UDMA/100 cmd 0xffffc20000070000 ctl 0x0000000000000000 bmdma 0x0000000000000000 irq 0
[   57.872339] ata12: SATA max UDMA/100 cmd 0xffffc20000072000 ctl 0x0000000000000000 bmdma 0x0000000000000000 irq 0
[   58.276407] ata11: SATA link up 1.5 Gbps (SStatus 113 SControl 300)
[   58.278892] ata11.00: ata_hpa_resize 1: sectors = 293046768, hpa_sectors = 293046768
[   58.278964] ata11.00: ATA-7: WDC WD1500ADFD-00NLR1, 20.07P20, max UDMA/133
[   58.279019] ata11.00: 293046768 sectors, multi 16: LBA48 NCQ (depth 31/32)
[   58.282273] ata11.00: ata_hpa_resize 1: sectors = 293046768, hpa_sectors = 293046768
[   58.282345] ata11.00: configured for UDMA/100
[   58.686525] ata12: SATA link up 1.5 Gbps (SStatus 113 SControl 300)
[   58.689025] ata12.00: ata_hpa_resize 1: sectors = 293046768, hpa_sectors = 293046768
[   58.689096] ata12.00: ATA-7: WDC WD1500ADFD-00NLR1, 20.07P20, max UDMA/133
[   58.689180] ata12.00: 293046768 sectors, multi 16: LBA48 NCQ (depth 31/32)
[   58.692407] ata12.00: ata_hpa_resize 1: sectors = 293046768, hpa_sectors = 293046768
[   58.692481] ata12.00: configured for UDMA/100
[   58.692587] scsi 10:0:0:0: Direct-Access     ATA      WDC WD1500ADFD-0 20.0 PQ: 0 ANSI: 5
[   58.692737] sd 10:0:0:0: [sdk] 293046768 512-byte hardware sectors (150040 MB)
[   58.692819] sd 10:0:0:0: [sdk] Write Protect is off
[   58.692866] sd 10:0:0:0: [sdk] Mode Sense: 00 3a 00 00
[   58.692876] sd 10:0:0:0: [sdk] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[   58.692988] sd 10:0:0:0: [sdk] 293046768 512-byte hardware sectors (150040 MB)
[   58.693061] sd 10:0:0:0: [sdk] Write Protect is off
[   58.693126] sd 10:0:0:0: [sdk] Mode Sense: 00 3a 00 00
[   58.693136] sd 10:0:0:0: [sdk] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[   58.693207]  sdk: sdk1
[   58.702890] sd 10:0:0:0: [sdk] Attached SCSI disk
[   58.703033] scsi 11:0:0:0: Direct-Access     ATA      WDC WD1500ADFD-0 20.0 PQ: 0 ANSI: 5
[   58.703169] sd 11:0:0:0: [sdl] 293046768 512-byte hardware sectors (150040 MB)
[   58.703238] sd 11:0:0:0: [sdl] Write Protect is off
[   58.703286] sd 11:0:0:0: [sdl] Mode Sense: 00 3a 00 00
[   58.703296] sd 11:0:0:0: [sdl] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[   58.703408] sd 11:0:0:0: [sdl] 293046768 512-byte hardware sectors (150040 MB)
[   58.703482] sd 11:0:0:0: [sdl] Write Protect is off
[   58.703529] sd 11:0:0:0: [sdl] Mode Sense: 00 3a 00 00
[   58.703539] sd 11:0:0:0: [sdl] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[   58.703610]  sdl: sdl1
[   58.712922] sd 11:0:0:0: [sdl] Attached SCSI disk
[   58.713060] ACPI: PCI Interrupt 0000:02:00.0[A] -> GSI 17 (level, low) -> IRQ 17
[   58.713177] PCI: Setting latency timer of device 0000:02:00.0 to 64
[   58.713217] scsi12 : pata_marvell
[   58.713294] scsi13 : pata_marvell
[   58.713386] ata13: PATA max UDMA/100 cmd 0x0000000000015018 ctl 0x0000000000015026 bmdma 0x0000000000015000 irq 0
[   58.713458] ata14: DUMMY
[   58.713527] BAR5:00:00 01:7F 02:22 03:CA 04:00 05:00 06:00 07:00 08:00 09:00 0A:00 0B:00 0C:01 0D:00 0E:00 0F:00 
[   59.019568] ata13.00: ATAPI, max UDMA/33
[   59.178746] ata13.00: configured for UDMA/33
[   59.179810] scsi 12:0:0:0: CD-ROM            _NEC     DVD_RW ND-3520A  1.04 PQ: 0 ANSI: 5
[   59.184063] sr0: scsi3-mmc drive: 48x/48x writer cd/rw xa/form2 cdda tray
[   59.184112] Uniform CD-ROM driver Revision: 3.20
[   59.184193] sr 12:0:0:0: Attached scsi CD-ROM sr0
[   59.184253] ACPI: PCI Interrupt 0000:00:1a.7[C] -> GSI 18 (level, low) -> IRQ 18
[   59.184358] PCI: Setting latency timer of device 0000:00:1a.7 to 64
[   59.184361] ehci_hcd 0000:00:1a.7: EHCI Host Controller
[   59.184450] ehci_hcd 0000:00:1a.7: new USB bus registered, assigned bus number 1
[   59.184541] ehci_hcd 0000:00:1a.7: debug port 1
[   59.184588] PCI: cache line size of 32 is not supported by device 0000:00:1a.7
[   59.184591] ehci_hcd 0000:00:1a.7: irq 18, io mem 0xe0625c00
[   59.188525] ehci_hcd 0000:00:1a.7: USB 2.0 started, EHCI 1.00, driver 10 Dec 2004
[   59.188683] usb usb1: configuration #1 chosen from 1 choice
[   59.188755] hub 1-0:1.0: USB hub found
[   59.188803] hub 1-0:1.0: 4 ports detected
[   59.289605] ACPI: PCI Interrupt 0000:00:1d.7[A] -> GSI 23 (level, low) -> IRQ 23
[   59.289707] PCI: Setting latency timer of device 0000:00:1d.7 to 64
[   59.289711] ehci_hcd 0000:00:1d.7: EHCI Host Controller
[   59.289813] ehci_hcd 0000:00:1d.7: new USB bus registered, assigned bus number 2
[   59.289900] ehci_hcd 0000:00:1d.7: debug port 1
[   59.289970] PCI: cache line size of 32 is not supported by device 0000:00:1d.7
[   59.289976] ehci_hcd 0000:00:1d.7: irq 23, io mem 0xe0625800
[   59.293909] ehci_hcd 0000:00:1d.7: USB 2.0 started, EHCI 1.00, driver 10 Dec 2004
[   59.294043] usb usb2: configuration #1 chosen from 1 choice
[   59.294136] hub 2-0:1.0: USB hub found
[   59.294180] hub 2-0:1.0: 6 ports detected
[   59.395292] USB Universal Host Controller Interface driver v3.0
[   59.395383] ACPI: PCI Interrupt 0000:00:1a.0[A] -> GSI 16 (level, low) -> IRQ 16
[   59.395508] PCI: Setting latency timer of device 0000:00:1a.0 to 64
[   59.395510] uhci_hcd 0000:00:1a.0: UHCI Host Controller
[   59.395578] uhci_hcd 0000:00:1a.0: new USB bus registered, assigned bus number 3
[   59.395662] uhci_hcd 0000:00:1a.0: irq 16, io base 0x000060c0
[   59.395796] usb usb3: configuration #1 chosen from 1 choice
[   59.395868] hub 3-0:1.0: USB hub found
[   59.396512] hub 3-0:1.0: 2 ports detected
[   59.498341] ACPI: PCI Interrupt 0000:00:1a.1[B] -> GSI 21 (level, low) -> IRQ 21
[   59.498438] PCI: Setting latency timer of device 0000:00:1a.1 to 64
[   59.498442] uhci_hcd 0000:00:1a.1: UHCI Host Controller
[   59.498542] uhci_hcd 0000:00:1a.1: new USB bus registered, assigned bus number 4
[   59.498628] uhci_hcd 0000:00:1a.1: irq 21, io base 0x000060a0
[   59.498779] usb usb4: configuration #1 chosen from 1 choice
[   59.498848] hub 4-0:1.0: USB hub found
[   59.498896] hub 4-0:1.0: 2 ports detected
[   59.599341] ACPI: PCI Interrupt 0000:00:1d.0[A] -> GSI 23 (level, low) -> IRQ 23
[   59.599439] PCI: Setting latency timer of device 0000:00:1d.0 to 64
[   59.599442] uhci_hcd 0000:00:1d.0: UHCI Host Controller
[   59.599547] uhci_hcd 0000:00:1d.0: new USB bus registered, assigned bus number 5
[   59.599631] uhci_hcd 0000:00:1d.0: irq 23, io base 0x00006080
[   59.599783] usb usb5: configuration #1 chosen from 1 choice
[   59.599855] hub 5-0:1.0: USB hub found
[   59.599902] hub 5-0:1.0: 2 ports detected
[   59.700337] ACPI: PCI Interrupt 0000:00:1d.1[B] -> GSI 19 (level, low) -> IRQ 19
[   59.700443] PCI: Setting latency timer of device 0000:00:1d.1 to 64
[   59.700446] uhci_hcd 0000:00:1d.1: UHCI Host Controller
[   59.700538] uhci_hcd 0000:00:1d.1: new USB bus registered, assigned bus number 6
[   59.700619] uhci_hcd 0000:00:1d.1: irq 19, io base 0x00006060
[   59.700772] usb usb6: configuration #1 chosen from 1 choice
[   59.700842] hub 6-0:1.0: USB hub found
[   59.700890] hub 6-0:1.0: 2 ports detected
[   59.801355] ACPI: PCI Interrupt 0000:00:1d.2[C] -> GSI 18 (level, low) -> IRQ 18
[   59.801458] PCI: Setting latency timer of device 0000:00:1d.2 to 64
[   59.801461] uhci_hcd 0000:00:1d.2: UHCI Host Controller
[   59.801549] uhci_hcd 0000:00:1d.2: new USB bus registered, assigned bus number 7
[   59.801631] uhci_hcd 0000:00:1d.2: irq 18, io base 0x00006040
[   59.801782] usb usb7: configuration #1 chosen from 1 choice
[   59.801851] hub 7-0:1.0: USB hub found
[   59.801898] hub 7-0:1.0: 2 ports detected
[   59.902371] Initializing USB Mass Storage driver...
[   59.906365] usb 5-1: new low speed USB device using uhci_hcd and address 2
[   60.066711] usb 5-1: configuration #1 chosen from 1 choice
[   60.069764] usbcore: registered new interface driver usb-storage
[   60.069814] USB Mass Storage support registered.
[   60.069933] PNP: PS/2 Controller [PNP0303:PS2K,PNP0f03:PS2M] at 0x60,0x64 irq 1,12
[   60.072839] serio: i8042 KBD port at 0x60,0x64 irq 1
[   60.072886] serio: i8042 AUX port at 0x60,0x64 irq 12
[   60.072959] mice: PS/2 mouse device common for all mice
[   60.073175] input: PC Speaker as /class/input/input0
[   60.073224] i2c /dev entries driver
[   60.073338] coretemp coretemp.0: Using undocumented features, absolute temperature might be wrong!
[   60.073453] coretemp coretemp.1: Using undocumented features, absolute temperature might be wrong!
[   60.073562] coretemp coretemp.2: Using undocumented features, absolute temperature might be wrong!
[   60.073675] coretemp coretemp.3: Using undocumented features, absolute temperature might be wrong!
[   60.073779] md: raid1 personality registered for level 1
[   60.090134] raid6: int64x1   2023 MB/s
[   60.107135] raid6: int64x2   2781 MB/s
[   60.124120] raid6: int64x4   3109 MB/s
[   60.141143] raid6: int64x8   1917 MB/s
[   60.158123] raid6: sse2x1    3843 MB/s
[   60.175118] raid6: sse2x2    4238 MB/s
[   60.192112] raid6: sse2x4    6796 MB/s
[   60.192154] raid6: using algorithm sse2x4 (6796 MB/s)
[   60.192202] md: raid6 personality registered for level 6
[   60.192247] md: raid5 personality registered for level 5
[   60.192294] md: raid4 personality registered for level 4
[   60.192340] raid5: automatically using best checksumming function: generic_sse
[   60.197110]    generic_sse:  8584.000 MB/sec
[   60.197153] raid5: using function: generic_sse (8584.000 MB/sec)
[   60.197228] usbcore: registered new interface driver hiddev
[   60.643555] hiddev96: USB HID v1.00 Device [        UPS] on usb-0000:00:1d.0-1
[   60.643672] usbcore: registered new interface driver usbhid
[   60.643721] drivers/hid/usbhid/hid-core.c: v2.6:USB HID core driver
[   60.643781] Advanced Linux Sound Architecture Driver Version 1.0.14 (Thu May 31 09:03:25 2007 UTC).
[   60.643996] ACPI: PCI Interrupt 0000:00:1b.0[A] -> GSI 22 (level, low) -> IRQ 22
[   60.644106] PCI: Setting latency timer of device 0000:00:1b.0 to 64
[   60.711615] input: ImPS/2 Generic Wheel Mouse as /class/input/input1
[   60.735452] input: AT Translated Set 2 keyboard as /class/input/input2
[   60.779950] ALSA device list:
[   60.779996]   #0: HDA Intel at 0xe0620000 irq 22
[   60.780049] u32 classifier
[   60.780102]     Actions configured 
[   60.780167] nf_conntrack version 0.5.0 (8192 buckets, 65536 max)
[   60.780378] ip_tables: (C) 2000-2006 Netfilter Core Team
[   60.780475] TCP cubic registered
[   60.780533] NET: Registered protocol family 1
[   60.780590] NET: Registered protocol family 17
[   60.780895] md: Autodetecting RAID arrays.
[   60.926037] md: autorun ...
[   60.926077] md: considering sdl1 ...
[   60.926145] md:  adding sdl1 ...
[   60.926190] md:  adding sdk1 ...
[   60.926235] md:  adding sdj1 ...
[   60.926290] md:  adding sdi1 ...
[   60.926345] md:  adding sdh1 ...
[   60.926389] md:  adding sdg1 ...
[   60.926434] md:  adding sdf1 ...
[   60.926479] md:  adding sde1 ...
[   60.926524] md:  adding sdd1 ...
[   60.926566] md:  adding sdc1 ...
[   60.926610] md: sdb3 has different UUID to sdl1
[   60.926658] md: sdb2 has different UUID to sdl1
[   60.926705] md: sdb1 has different UUID to sdl1
[   60.926751] md: sda3 has different UUID to sdl1
[   60.926798] md: sda2 has different UUID to sdl1
[   60.926846] md: sda1 has different UUID to sdl1
[   60.926926] md: created md3
[   60.926967] md: bind<sdc1>
[   60.927013] md: bind<sdd1>
[   60.927058] md: bind<sde1>
[   60.927104] md: bind<sdf1>
[   60.927160] md: bind<sdg1>
[   60.927216] md: bind<sdh1>
[   60.927262] md: bind<sdi1>
[   60.927308] md: bind<sdj1>
[   60.927374] md: bind<sdk1>
[   60.927420] md: bind<sdl1>
[   60.927467] md: running: <sdl1><sdk1><sdj1><sdi1><sdh1><sdg1><sdf1><sde1><sdd1><sdc1>
[   60.927774] raid5: device sdl1 operational as raid disk 2
[   60.927824] raid5: device sdk1 operational as raid disk 3
[   60.927872] raid5: device sdj1 operational as raid disk 0
[   60.927919] raid5: device sdi1 operational as raid disk 1
[   60.927968] raid5: device sdh1 operational as raid disk 6
[   60.928016] raid5: device sdg1 operational as raid disk 5
[   60.928063] raid5: device sdf1 operational as raid disk 4
[   60.928111] raid5: device sde1 operational as raid disk 7
[   60.928159] raid5: device sdd1 operational as raid disk 8
[   60.928227] raid5: device sdc1 operational as raid disk 9
[   60.928780] raid5: allocated 10562kB for md3
[   60.928825] raid5: raid level 5 set md3 active with 10 out of 10 devices, algorithm 2
[   60.928893] RAID5 conf printout:
[   60.928937]  --- rd:10 wd:10
[   60.928979]  disk 0, o:1, dev:sdj1
[   60.929022]  disk 1, o:1, dev:sdi1
[   60.929065]  disk 2, o:1, dev:sdl1
[   60.929106]  disk 3, o:1, dev:sdk1
[   60.929150]  disk 4, o:1, dev:sdf1
[   60.929193]  disk 5, o:1, dev:sdg1
[   60.929236]  disk 6, o:1, dev:sdh1
[   60.929300]  disk 7, o:1, dev:sde1
[   60.929343]  disk 8, o:1, dev:sdd1
[   60.929386]  disk 9, o:1, dev:sdc1
[   60.929487] md: considering sdb3 ...
[   60.929531] md:  adding sdb3 ...
[   60.929576] md: sdb2 has different UUID to sdb3
[   60.929620] md: sdb1 has different UUID to sdb3
[   60.929668] md:  adding sda3 ...
[   60.929709] md: sda2 has different UUID to sdb3
[   60.929756] md: sda1 has different UUID to sdb3
[   60.929832] md: created md2
[   60.929872] md: bind<sda3>
[   60.929918] md: bind<sdb3>
[   60.929964] md: running: <sdb3><sda3>
[   60.930104] raid1: raid set md2 active with 2 out of 2 mirrors
[   60.930172] md: considering sdb2 ...
[   60.930218] md:  adding sdb2 ...
[   60.930262] md: sdb1 has different UUID to sdb2
[   60.930331] md:  adding sda2 ...
[   60.930375] md: sda1 has different UUID to sdb2
[   60.930451] md: created md1
[   60.930515] md: bind<sda2>
[   60.930557] md: bind<sdb2>
[   60.930604] md: running: <sdb2><sda2>
[   60.930742] raid1: raid set md1 active with 2 out of 2 mirrors
[   60.930813] md: considering sdb1 ...
[   60.930860] md:  adding sdb1 ...
[   60.930904] md:  adding sda1 ...
[   60.930949] md: created md0
[   60.930991] md: bind<sda1>
[   60.931038] md: bind<sdb1>
[   60.931085] md: running: <sdb1><sda1>
[   60.931223] raid1: raid set md0 active with 2 out of 2 mirrors
[   60.931889] md: ... autorun DONE.
[   60.956814] UDF-fs: No VRS found
[   60.957045] Filesystem "md2": Disabling barriers, not supported by the underlying device
[   60.969374] XFS mounting filesystem md2
[   61.038746] Ending clean XFS mount for filesystem: md2
[   61.038765] VFS: Mounted root (xfs filesystem) readonly.
[   61.038867] Freeing unused kernel memory: 216k freed
[   63.031810] Adding 16787768k swap on /dev/md0.  Priority:-1 extents:1 across:16787768k
[   63.085652] Filesystem "md2": Disabling barriers, not supported by the underlying device
[   63.876749] kjournald starting.  Commit interval 5 seconds
[   63.883153] EXT3 FS on md1, internal journal
[   63.883225] EXT3-fs: mounted filesystem with ordered data mode.
[   63.883591] Filesystem "md3": Disabling barriers, not supported by the underlying device
[   63.883845] XFS mounting filesystem md3
[   64.258308] Ending clean XFS mount for filesystem: md3
[   82.579367] e1000: eth1: e1000_watchdog: NIC Link is Up 100 Mbps Full Duplex, Flow Control: RX
[   84.433542] process `syslogd' is using obsolete setsockopt SO_BSDCOMPAT
[  105.528789] mtrr: no more MTRRs available

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-07 22:30 Jesse Barnes
  2007-06-07 22:50 ` Justin Piszcz
@ 2007-06-07 22:53 ` Justin Piszcz
  2007-06-07 23:00 ` Justin Piszcz
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 118+ messages in thread
From: Justin Piszcz @ 2007-06-07 22:53 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: Andi Kleen, linux-kernel, Eric W. Biederman

p34:/usr/src/linux# patch -p1 < ../mtrr-v2.patch
patching file Documentation/kernel-parameters.txt
patching file arch/i386/kernel/cpu/mtrr/generic.c
patching file arch/i386/kernel/cpu/mtrr/if.c
patching file arch/i386/kernel/cpu/mtrr/main.c
patching file arch/i386/kernel/cpu/mtrr/mtrr.h
patching file arch/x86_64/kernel/bugs.c
patching file arch/x86_64/kernel/setup.c
patching file include/asm-x86_64/mtrr.h
p34:/usr/src/linux#

Applies clean to 2.6.22-rc4, verifying shortly.

On Thu, 7 Jun 2007, Jesse Barnes wrote:

> On some machines, buggy BIOSes don't properly setup WB MTRRs to
> cover all available RAM, meaning the last few megs (or even gigs)
> of memory will be marked uncached.  Since Linux tends to allocate
> from high memory addresses first, this causes the machine to be
> unusably slow as soon as the kernel starts really using memory
> (i.e. right around init time).
>
> This patch works around the problem by scanning the MTRRs at
> boot and figuring out whether the current end_pfn value (setup
> by early e820 code) goes beyond the highest WB MTRR range, and
> if so, trimming it to match.  A fairly obnoxious KERN_WARNING
> is printed too, letting the user know that not all of their
> memory is available due to a likely BIOS bug.
>
> Something similar could be done on i386 if needed, but the boot
> ordering would be slightly different, since the MTRR code on i386
> depends on the boot_cpu_data structure being setup.
>
> This patch incorporates the feedback from Eric and Andi:
>  - use MAX_VAR_RANGES instead of NUM_VAR_RANGES
>  - move array declaration to header file as an extern
>  - add command line disable option "disable_mtrr_trim"
>  - don't run the trim code if the MTRR default type is cacheable
>  - don't run the trim code on non-Intel machines
>
> Justin, feel free to test again if you have time and add your
> "Tested-by" signoff.
>
> Andi, as for large pages, do you think this is ok as is, or should
> I trim a larger granularity?  If so, what granularity?
>
> Signed-off-by:  Jesse Barnes <jesse.barnes@intel.com>
>
> Thanks,
> Jesse
>
> diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
> index 5d0283c..cb728a8 100644
> --- a/Documentation/kernel-parameters.txt
> +++ b/Documentation/kernel-parameters.txt
> @@ -553,6 +553,12 @@ and is between 256 and 4096 characters. It is defined in the file
> 			See drivers/char/README.epca and
> 			Documentation/digiepca.txt.
>
> +	disable_mtrr_trim [X86-64]
> +			By default the kernel will trim any uncacheable
> +			memory out of your available memory pool based on
> +			MTRR settings.  This parameter disables that behavior,
> +			possibly causing your machine to run very slowly.
> +
> 	dmascc=		[HW,AX25,SERIAL] AX.25 Z80SCC driver with DMA
> 			support available.
> 			Format: <io_dev0>[,<io_dev1>[,..<io_dev32>]]
> diff --git a/arch/i386/kernel/cpu/mtrr/generic.c b/arch/i386/kernel/cpu/mtrr/generic.c
> index c4ebb51..8eb3085 100644
> --- a/arch/i386/kernel/cpu/mtrr/generic.c
> +++ b/arch/i386/kernel/cpu/mtrr/generic.c
> @@ -13,7 +13,7 @@
> #include "mtrr.h"
>
> struct mtrr_state {
> -	struct mtrr_var_range *var_ranges;
> +	struct mtrr_var_range var_ranges[MAX_VAR_RANGES];
> 	mtrr_type fixed_ranges[NUM_FIXED_RANGES];
> 	unsigned char enabled;
> 	unsigned char have_fixed;
> @@ -84,12 +84,6 @@ void get_mtrr_state(void)
> 	struct mtrr_var_range *vrs;
> 	unsigned lo, dummy;
>
> -	if (!mtrr_state.var_ranges) {
> -		mtrr_state.var_ranges = kmalloc(num_var_ranges * sizeof (struct mtrr_var_range),
> -						GFP_KERNEL);
> -		if (!mtrr_state.var_ranges)
> -			return;
> -	}
> 	vrs = mtrr_state.var_ranges;
>
> 	rdmsr(MTRRcap_MSR, lo, dummy);
> diff --git a/arch/i386/kernel/cpu/mtrr/if.c b/arch/i386/kernel/cpu/mtrr/if.c
> index c7d8f17..0e34a67 100644
> --- a/arch/i386/kernel/cpu/mtrr/if.c
> +++ b/arch/i386/kernel/cpu/mtrr/if.c
> @@ -11,10 +11,6 @@
> #include <asm/mtrr.h>
> #include "mtrr.h"
>
> -/* RED-PEN: this is accessed without any locking */
> -extern unsigned int *usage_table;
> -
> -
> #define FILE_FCOUNT(f) (((struct seq_file *)((f)->private_data))->private)
>
> static const char *const mtrr_strings[MTRR_NUM_TYPES] =
> diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c
> index 7202b98..ef552ba 100644
> --- a/arch/i386/kernel/cpu/mtrr/main.c
> +++ b/arch/i386/kernel/cpu/mtrr/main.c
> @@ -38,8 +38,8 @@
> #include <linux/cpu.h>
> #include <linux/mutex.h>
>
> +#include <asm/e820.h>
> #include <asm/mtrr.h>
> -
> #include <asm/uaccess.h>
> #include <asm/processor.h>
> #include <asm/msr.h>
> @@ -47,7 +47,7 @@
>
> u32 num_var_ranges = 0;
>
> -unsigned int *usage_table;
> +unsigned int usage_table[MAX_VAR_RANGES];
> static DEFINE_MUTEX(mtrr_mutex);
>
> u64 size_or_mask, size_and_mask;
> @@ -121,11 +121,6 @@ static void __init init_table(void)
> 	int i, max;
>
> 	max = num_var_ranges;
> -	if ((usage_table = kmalloc(max * sizeof *usage_table, GFP_KERNEL))
> -	    == NULL) {
> -		printk(KERN_ERR "mtrr: could not allocate\n");
> -		return;
> -	}
> 	for (i = 0; i < max; i++)
> 		usage_table[i] = 1;
> }
> @@ -589,16 +584,11 @@ struct mtrr_value {
> 	unsigned long	lsize;
> };
>
> -static struct mtrr_value * mtrr_state;
> +static struct mtrr_value mtrr_state[MAX_VAR_RANGES];
>
> static int mtrr_save(struct sys_device * sysdev, pm_message_t state)
> {
> 	int i;
> -	int size = num_var_ranges * sizeof(struct mtrr_value);
> -
> -	mtrr_state = kzalloc(size,GFP_ATOMIC);
> -	if (!mtrr_state)
> -		return -ENOMEM;
>
> 	for (i = 0; i < num_var_ranges; i++) {
> 		mtrr_if->get(i,
> @@ -620,7 +610,6 @@ static int mtrr_restore(struct sys_device * sysdev)
> 				 mtrr_state[i].lsize,
> 				 mtrr_state[i].ltype);
> 	}
> -	kfree(mtrr_state);
> 	return 0;
> }
>
> @@ -631,6 +620,57 @@ static struct sysdev_driver mtrr_sysdev_driver = {
> 	.resume		= mtrr_restore,
> };
>
> +static int disable_mtrr_trim;
> +
> +static int __init disable_mtrr_trim_setup(char *str)
> +{
> +	disable_mtrr_trim = 1;
> +	return 0;
> +}
> +early_param("disable_mtrr_trim", disable_mtrr_trim_setup);
> +
> +/**
> + * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs
> + *
> + * Some buggy BIOSes don't setup the MTRRs properly for systems with certain
> + * memory configurations.  This routine checks to make sure the MTRRs having
> + * a write back type cover all of the memory the kernel is intending to use.
> + * If not, it'll trim any memory off the end by adjusting end_pfn, removing
> + * it from the kernel's allocation pools, warning the user with an obnoxious
> + * message.
> + */
> +void __init mtrr_trim_uncached_memory(void)
> +{
> +	unsigned long i, base, size, highest_addr = 0, def, dummy;
> +	mtrr_type type;
> +
> +	/* Make sure we only trim uncachable memory on Intel machines */
> +	rdmsr(MTRRdefType_MSR, def, dummy);
> +	def &= 0xff;
> +	if (!use_intel() || disable_mtrr_trim || def != MTRR_TYPE_UNCACHABLE)
> +		return;
> +
> +	/* Find highest cached pfn */
> +	for (i = 0; i < num_var_ranges; i++) {
> +		mtrr_if->get(i, &base, &size, &type);
> +		if (type != MTRR_TYPE_WRBACK)
> +			continue;
> +		base <<= PAGE_SHIFT;
> +		size <<= PAGE_SHIFT;
> +		if (highest_addr < base + size)
> +			highest_addr = base + size;
> +	}
> +
> +	if ((highest_addr >> PAGE_SHIFT) != end_pfn) {
> +		printk(KERN_WARNING "***************\n");
> +		printk(KERN_WARNING "**** WARNING: likely BIOS bug\n");
> +		printk(KERN_WARNING "**** MTRRs don't cover all of "
> +		       "memory, trimmed %ld pages\n", end_pfn -
> +		       (highest_addr >> PAGE_SHIFT));
> +		printk(KERN_WARNING "***************\n");
> +		end_pfn = highest_addr >> PAGE_SHIFT;
> +	}
> +}
>
> /**
>  * mtrr_bp_init - initialize mtrrs on the boot CPU
> diff --git a/arch/i386/kernel/cpu/mtrr/mtrr.h b/arch/i386/kernel/cpu/mtrr/mtrr.h
> index 289dfe6..627b339 100644
> --- a/arch/i386/kernel/cpu/mtrr/mtrr.h
> +++ b/arch/i386/kernel/cpu/mtrr/mtrr.h
> @@ -14,6 +14,7 @@
> #define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
>
> #define NUM_FIXED_RANGES 88
> +#define MAX_VAR_RANGES 256
> #define MTRRfix64K_00000_MSR 0x250
> #define MTRRfix16K_80000_MSR 0x258
> #define MTRRfix16K_A0000_MSR 0x259
> @@ -34,6 +35,8 @@
>    an 8 bit field: */
> typedef u8 mtrr_type;
>
> +extern unsigned int usage_table[MAX_VAR_RANGES];
> +
> struct mtrr_ops {
> 	u32	vendor;
> 	u32	use_intel_if;
> diff --git a/arch/x86_64/kernel/bugs.c b/arch/x86_64/kernel/bugs.c
> index c3c6b91..c138eac 100644
> --- a/arch/x86_64/kernel/bugs.c
> +++ b/arch/x86_64/kernel/bugs.c
> @@ -14,7 +14,6 @@
> void __init check_bugs(void)
> {
> 	identify_cpu(&boot_cpu_data);
> -	mtrr_bp_init();
> #if !defined(CONFIG_SMP)
> 	printk("CPU: ");
> 	print_cpu_info(&boot_cpu_data);
> diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
> index eb6524f..409b63c 100644
> --- a/arch/x86_64/kernel/setup.c
> +++ b/arch/x86_64/kernel/setup.c
> @@ -266,6 +266,10 @@ void __init setup_arch(char **cmdline_p)
> 	 * we are rounding upwards:
> 	 */
> 	end_pfn = e820_end_of_ram();
> +	/* Trim memory not covered by WB MTRRs */
> +	mtrr_bp_init();
> +	mtrr_trim_uncached_memory();
> +
> 	num_physpages = end_pfn;
>
> 	check_efer();
> diff --git a/include/asm-x86_64/mtrr.h b/include/asm-x86_64/mtrr.h
> index b557c48..cc62bd8 100644
> --- a/include/asm-x86_64/mtrr.h
> +++ b/include/asm-x86_64/mtrr.h
> @@ -78,6 +78,7 @@ extern int mtrr_add_page (unsigned long base, unsigned long size,
> 		     unsigned int type, char increment);
> extern int mtrr_del (int reg, unsigned long base, unsigned long size);
> extern int mtrr_del_page (int reg, unsigned long base, unsigned long size);
> +extern void mtrr_trim_uncached_memory(void);
> #  else
> static __inline__ int mtrr_add (unsigned long base, unsigned long size,
> 				unsigned int type, char increment)
>

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH] trim memory not covered by WB MTRRs
  2007-06-07 22:30 Jesse Barnes
@ 2007-06-07 22:50 ` Justin Piszcz
  2007-06-07 22:53 ` Justin Piszcz
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 118+ messages in thread
From: Justin Piszcz @ 2007-06-07 22:50 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: Andi Kleen, linux-kernel, Eric W. Biederman

Will see if it still patches against -rc4.

On Thu, 7 Jun 2007, Jesse Barnes wrote:

> On some machines, buggy BIOSes don't properly setup WB MTRRs to
> cover all available RAM, meaning the last few megs (or even gigs)
> of memory will be marked uncached.  Since Linux tends to allocate
> from high memory addresses first, this causes the machine to be
> unusably slow as soon as the kernel starts really using memory
> (i.e. right around init time).
>
> This patch works around the problem by scanning the MTRRs at
> boot and figuring out whether the current end_pfn value (setup
> by early e820 code) goes beyond the highest WB MTRR range, and
> if so, trimming it to match.  A fairly obnoxious KERN_WARNING
> is printed too, letting the user know that not all of their
> memory is available due to a likely BIOS bug.
>
> Something similar could be done on i386 if needed, but the boot
> ordering would be slightly different, since the MTRR code on i386
> depends on the boot_cpu_data structure being setup.
>
> This patch incorporates the feedback from Eric and Andi:
>  - use MAX_VAR_RANGES instead of NUM_VAR_RANGES
>  - move array declaration to header file as an extern
>  - add command line disable option "disable_mtrr_trim"
>  - don't run the trim code if the MTRR default type is cacheable
>  - don't run the trim code on non-Intel machines
>
> Justin, feel free to test again if you have time and add your
> "Tested-by" signoff.
>
> Andi, as for large pages, do you think this is ok as is, or should
> I trim a larger granularity?  If so, what granularity?
>
> Signed-off-by:  Jesse Barnes <jesse.barnes@intel.com>
>
> Thanks,
> Jesse
>
> diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
> index 5d0283c..cb728a8 100644
> --- a/Documentation/kernel-parameters.txt
> +++ b/Documentation/kernel-parameters.txt
> @@ -553,6 +553,12 @@ and is between 256 and 4096 characters. It is defined in the file
> 			See drivers/char/README.epca and
> 			Documentation/digiepca.txt.
>
> +	disable_mtrr_trim [X86-64]
> +			By default the kernel will trim any uncacheable
> +			memory out of your available memory pool based on
> +			MTRR settings.  This parameter disables that behavior,
> +			possibly causing your machine to run very slowly.
> +
> 	dmascc=		[HW,AX25,SERIAL] AX.25 Z80SCC driver with DMA
> 			support available.
> 			Format: <io_dev0>[,<io_dev1>[,..<io_dev32>]]
> diff --git a/arch/i386/kernel/cpu/mtrr/generic.c b/arch/i386/kernel/cpu/mtrr/generic.c
> index c4ebb51..8eb3085 100644
> --- a/arch/i386/kernel/cpu/mtrr/generic.c
> +++ b/arch/i386/kernel/cpu/mtrr/generic.c
> @@ -13,7 +13,7 @@
> #include "mtrr.h"
>
> struct mtrr_state {
> -	struct mtrr_var_range *var_ranges;
> +	struct mtrr_var_range var_ranges[MAX_VAR_RANGES];
> 	mtrr_type fixed_ranges[NUM_FIXED_RANGES];
> 	unsigned char enabled;
> 	unsigned char have_fixed;
> @@ -84,12 +84,6 @@ void get_mtrr_state(void)
> 	struct mtrr_var_range *vrs;
> 	unsigned lo, dummy;
>
> -	if (!mtrr_state.var_ranges) {
> -		mtrr_state.var_ranges = kmalloc(num_var_ranges * sizeof (struct mtrr_var_range),
> -						GFP_KERNEL);
> -		if (!mtrr_state.var_ranges)
> -			return;
> -	}
> 	vrs = mtrr_state.var_ranges;
>
> 	rdmsr(MTRRcap_MSR, lo, dummy);
> diff --git a/arch/i386/kernel/cpu/mtrr/if.c b/arch/i386/kernel/cpu/mtrr/if.c
> index c7d8f17..0e34a67 100644
> --- a/arch/i386/kernel/cpu/mtrr/if.c
> +++ b/arch/i386/kernel/cpu/mtrr/if.c
> @@ -11,10 +11,6 @@
> #include <asm/mtrr.h>
> #include "mtrr.h"
>
> -/* RED-PEN: this is accessed without any locking */
> -extern unsigned int *usage_table;
> -
> -
> #define FILE_FCOUNT(f) (((struct seq_file *)((f)->private_data))->private)
>
> static const char *const mtrr_strings[MTRR_NUM_TYPES] =
> diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c
> index 7202b98..ef552ba 100644
> --- a/arch/i386/kernel/cpu/mtrr/main.c
> +++ b/arch/i386/kernel/cpu/mtrr/main.c
> @@ -38,8 +38,8 @@
> #include <linux/cpu.h>
> #include <linux/mutex.h>
>
> +#include <asm/e820.h>
> #include <asm/mtrr.h>
> -
> #include <asm/uaccess.h>
> #include <asm/processor.h>
> #include <asm/msr.h>
> @@ -47,7 +47,7 @@
>
> u32 num_var_ranges = 0;
>
> -unsigned int *usage_table;
> +unsigned int usage_table[MAX_VAR_RANGES];
> static DEFINE_MUTEX(mtrr_mutex);
>
> u64 size_or_mask, size_and_mask;
> @@ -121,11 +121,6 @@ static void __init init_table(void)
> 	int i, max;
>
> 	max = num_var_ranges;
> -	if ((usage_table = kmalloc(max * sizeof *usage_table, GFP_KERNEL))
> -	    == NULL) {
> -		printk(KERN_ERR "mtrr: could not allocate\n");
> -		return;
> -	}
> 	for (i = 0; i < max; i++)
> 		usage_table[i] = 1;
> }
> @@ -589,16 +584,11 @@ struct mtrr_value {
> 	unsigned long	lsize;
> };
>
> -static struct mtrr_value * mtrr_state;
> +static struct mtrr_value mtrr_state[MAX_VAR_RANGES];
>
> static int mtrr_save(struct sys_device * sysdev, pm_message_t state)
> {
> 	int i;
> -	int size = num_var_ranges * sizeof(struct mtrr_value);
> -
> -	mtrr_state = kzalloc(size,GFP_ATOMIC);
> -	if (!mtrr_state)
> -		return -ENOMEM;
>
> 	for (i = 0; i < num_var_ranges; i++) {
> 		mtrr_if->get(i,
> @@ -620,7 +610,6 @@ static int mtrr_restore(struct sys_device * sysdev)
> 				 mtrr_state[i].lsize,
> 				 mtrr_state[i].ltype);
> 	}
> -	kfree(mtrr_state);
> 	return 0;
> }
>
> @@ -631,6 +620,57 @@ static struct sysdev_driver mtrr_sysdev_driver = {
> 	.resume		= mtrr_restore,
> };
>
> +static int disable_mtrr_trim;
> +
> +static int __init disable_mtrr_trim_setup(char *str)
> +{
> +	disable_mtrr_trim = 1;
> +	return 0;
> +}
> +early_param("disable_mtrr_trim", disable_mtrr_trim_setup);
> +
> +/**
> + * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs
> + *
> + * Some buggy BIOSes don't setup the MTRRs properly for systems with certain
> + * memory configurations.  This routine checks to make sure the MTRRs having
> + * a write back type cover all of the memory the kernel is intending to use.
> + * If not, it'll trim any memory off the end by adjusting end_pfn, removing
> + * it from the kernel's allocation pools, warning the user with an obnoxious
> + * message.
> + */
> +void __init mtrr_trim_uncached_memory(void)
> +{
> +	unsigned long i, base, size, highest_addr = 0, def, dummy;
> +	mtrr_type type;
> +
> +	/* Make sure we only trim uncachable memory on Intel machines */
> +	rdmsr(MTRRdefType_MSR, def, dummy);
> +	def &= 0xff;
> +	if (!use_intel() || disable_mtrr_trim || def != MTRR_TYPE_UNCACHABLE)
> +		return;
> +
> +	/* Find highest cached pfn */
> +	for (i = 0; i < num_var_ranges; i++) {
> +		mtrr_if->get(i, &base, &size, &type);
> +		if (type != MTRR_TYPE_WRBACK)
> +			continue;
> +		base <<= PAGE_SHIFT;
> +		size <<= PAGE_SHIFT;
> +		if (highest_addr < base + size)
> +			highest_addr = base + size;
> +	}
> +
> +	if ((highest_addr >> PAGE_SHIFT) != end_pfn) {
> +		printk(KERN_WARNING "***************\n");
> +		printk(KERN_WARNING "**** WARNING: likely BIOS bug\n");
> +		printk(KERN_WARNING "**** MTRRs don't cover all of "
> +		       "memory, trimmed %ld pages\n", end_pfn -
> +		       (highest_addr >> PAGE_SHIFT));
> +		printk(KERN_WARNING "***************\n");
> +		end_pfn = highest_addr >> PAGE_SHIFT;
> +	}
> +}
>
> /**
>  * mtrr_bp_init - initialize mtrrs on the boot CPU
> diff --git a/arch/i386/kernel/cpu/mtrr/mtrr.h b/arch/i386/kernel/cpu/mtrr/mtrr.h
> index 289dfe6..627b339 100644
> --- a/arch/i386/kernel/cpu/mtrr/mtrr.h
> +++ b/arch/i386/kernel/cpu/mtrr/mtrr.h
> @@ -14,6 +14,7 @@
> #define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
>
> #define NUM_FIXED_RANGES 88
> +#define MAX_VAR_RANGES 256
> #define MTRRfix64K_00000_MSR 0x250
> #define MTRRfix16K_80000_MSR 0x258
> #define MTRRfix16K_A0000_MSR 0x259
> @@ -34,6 +35,8 @@
>    an 8 bit field: */
> typedef u8 mtrr_type;
>
> +extern unsigned int usage_table[MAX_VAR_RANGES];
> +
> struct mtrr_ops {
> 	u32	vendor;
> 	u32	use_intel_if;
> diff --git a/arch/x86_64/kernel/bugs.c b/arch/x86_64/kernel/bugs.c
> index c3c6b91..c138eac 100644
> --- a/arch/x86_64/kernel/bugs.c
> +++ b/arch/x86_64/kernel/bugs.c
> @@ -14,7 +14,6 @@
> void __init check_bugs(void)
> {
> 	identify_cpu(&boot_cpu_data);
> -	mtrr_bp_init();
> #if !defined(CONFIG_SMP)
> 	printk("CPU: ");
> 	print_cpu_info(&boot_cpu_data);
> diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
> index eb6524f..409b63c 100644
> --- a/arch/x86_64/kernel/setup.c
> +++ b/arch/x86_64/kernel/setup.c
> @@ -266,6 +266,10 @@ void __init setup_arch(char **cmdline_p)
> 	 * we are rounding upwards:
> 	 */
> 	end_pfn = e820_end_of_ram();
> +	/* Trim memory not covered by WB MTRRs */
> +	mtrr_bp_init();
> +	mtrr_trim_uncached_memory();
> +
> 	num_physpages = end_pfn;
>
> 	check_efer();
> diff --git a/include/asm-x86_64/mtrr.h b/include/asm-x86_64/mtrr.h
> index b557c48..cc62bd8 100644
> --- a/include/asm-x86_64/mtrr.h
> +++ b/include/asm-x86_64/mtrr.h
> @@ -78,6 +78,7 @@ extern int mtrr_add_page (unsigned long base, unsigned long size,
> 		     unsigned int type, char increment);
> extern int mtrr_del (int reg, unsigned long base, unsigned long size);
> extern int mtrr_del_page (int reg, unsigned long base, unsigned long size);
> +extern void mtrr_trim_uncached_memory(void);
> #  else
> static __inline__ int mtrr_add (unsigned long base, unsigned long size,
> 				unsigned int type, char increment)
>

^ permalink raw reply	[flat|nested] 118+ messages in thread

* [PATCH] trim memory not covered by WB MTRRs
@ 2007-06-07 22:30 Jesse Barnes
  2007-06-07 22:50 ` Justin Piszcz
                   ` (6 more replies)
  0 siblings, 7 replies; 118+ messages in thread
From: Jesse Barnes @ 2007-06-07 22:30 UTC (permalink / raw)
  To: Andi Kleen, linux-kernel; +Cc: Justin Piszcz, Eric W. Biederman

On some machines, buggy BIOSes don't properly setup WB MTRRs to
cover all available RAM, meaning the last few megs (or even gigs)
of memory will be marked uncached.  Since Linux tends to allocate
from high memory addresses first, this causes the machine to be
unusably slow as soon as the kernel starts really using memory
(i.e. right around init time).

This patch works around the problem by scanning the MTRRs at
boot and figuring out whether the current end_pfn value (setup
by early e820 code) goes beyond the highest WB MTRR range, and
if so, trimming it to match.  A fairly obnoxious KERN_WARNING
is printed too, letting the user know that not all of their
memory is available due to a likely BIOS bug.

Something similar could be done on i386 if needed, but the boot
ordering would be slightly different, since the MTRR code on i386
depends on the boot_cpu_data structure being setup.

This patch incorporates the feedback from Eric and Andi:
  - use MAX_VAR_RANGES instead of NUM_VAR_RANGES
  - move array declaration to header file as an extern
  - add command line disable option "disable_mtrr_trim"
  - don't run the trim code if the MTRR default type is cacheable
  - don't run the trim code on non-Intel machines

Justin, feel free to test again if you have time and add your
"Tested-by" signoff.

Andi, as for large pages, do you think this is ok as is, or should
I trim a larger granularity?  If so, what granularity?

Signed-off-by:  Jesse Barnes <jesse.barnes@intel.com>

Thanks,
Jesse

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 5d0283c..cb728a8 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -553,6 +553,12 @@ and is between 256 and 4096 characters. It is defined in the file
 			See drivers/char/README.epca and
 			Documentation/digiepca.txt.
 
+	disable_mtrr_trim [X86-64]
+			By default the kernel will trim any uncacheable
+			memory out of your available memory pool based on
+			MTRR settings.  This parameter disables that behavior,
+			possibly causing your machine to run very slowly.
+
 	dmascc=		[HW,AX25,SERIAL] AX.25 Z80SCC driver with DMA
 			support available.
 			Format: <io_dev0>[,<io_dev1>[,..<io_dev32>]]
diff --git a/arch/i386/kernel/cpu/mtrr/generic.c b/arch/i386/kernel/cpu/mtrr/generic.c
index c4ebb51..8eb3085 100644
--- a/arch/i386/kernel/cpu/mtrr/generic.c
+++ b/arch/i386/kernel/cpu/mtrr/generic.c
@@ -13,7 +13,7 @@
 #include "mtrr.h"
 
 struct mtrr_state {
-	struct mtrr_var_range *var_ranges;
+	struct mtrr_var_range var_ranges[MAX_VAR_RANGES];
 	mtrr_type fixed_ranges[NUM_FIXED_RANGES];
 	unsigned char enabled;
 	unsigned char have_fixed;
@@ -84,12 +84,6 @@ void get_mtrr_state(void)
 	struct mtrr_var_range *vrs;
 	unsigned lo, dummy;
 
-	if (!mtrr_state.var_ranges) {
-		mtrr_state.var_ranges = kmalloc(num_var_ranges * sizeof (struct mtrr_var_range), 
-						GFP_KERNEL);
-		if (!mtrr_state.var_ranges)
-			return;
-	} 
 	vrs = mtrr_state.var_ranges;
 
 	rdmsr(MTRRcap_MSR, lo, dummy);
diff --git a/arch/i386/kernel/cpu/mtrr/if.c b/arch/i386/kernel/cpu/mtrr/if.c
index c7d8f17..0e34a67 100644
--- a/arch/i386/kernel/cpu/mtrr/if.c
+++ b/arch/i386/kernel/cpu/mtrr/if.c
@@ -11,10 +11,6 @@
 #include <asm/mtrr.h>
 #include "mtrr.h"
 
-/* RED-PEN: this is accessed without any locking */
-extern unsigned int *usage_table;
-
-
 #define FILE_FCOUNT(f) (((struct seq_file *)((f)->private_data))->private)
 
 static const char *const mtrr_strings[MTRR_NUM_TYPES] =
diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c
index 7202b98..ef552ba 100644
--- a/arch/i386/kernel/cpu/mtrr/main.c
+++ b/arch/i386/kernel/cpu/mtrr/main.c
@@ -38,8 +38,8 @@
 #include <linux/cpu.h>
 #include <linux/mutex.h>
 
+#include <asm/e820.h>
 #include <asm/mtrr.h>
-
 #include <asm/uaccess.h>
 #include <asm/processor.h>
 #include <asm/msr.h>
@@ -47,7 +47,7 @@
 
 u32 num_var_ranges = 0;
 
-unsigned int *usage_table;
+unsigned int usage_table[MAX_VAR_RANGES];
 static DEFINE_MUTEX(mtrr_mutex);
 
 u64 size_or_mask, size_and_mask;
@@ -121,11 +121,6 @@ static void __init init_table(void)
 	int i, max;
 
 	max = num_var_ranges;
-	if ((usage_table = kmalloc(max * sizeof *usage_table, GFP_KERNEL))
-	    == NULL) {
-		printk(KERN_ERR "mtrr: could not allocate\n");
-		return;
-	}
 	for (i = 0; i < max; i++)
 		usage_table[i] = 1;
 }
@@ -589,16 +584,11 @@ struct mtrr_value {
 	unsigned long	lsize;
 };
 
-static struct mtrr_value * mtrr_state;
+static struct mtrr_value mtrr_state[MAX_VAR_RANGES];
 
 static int mtrr_save(struct sys_device * sysdev, pm_message_t state)
 {
 	int i;
-	int size = num_var_ranges * sizeof(struct mtrr_value);
-
-	mtrr_state = kzalloc(size,GFP_ATOMIC);
-	if (!mtrr_state)
-		return -ENOMEM;
 
 	for (i = 0; i < num_var_ranges; i++) {
 		mtrr_if->get(i,
@@ -620,7 +610,6 @@ static int mtrr_restore(struct sys_device * sysdev)
 				 mtrr_state[i].lsize,
 				 mtrr_state[i].ltype);
 	}
-	kfree(mtrr_state);
 	return 0;
 }
 
@@ -631,6 +620,57 @@ static struct sysdev_driver mtrr_sysdev_driver = {
 	.resume		= mtrr_restore,
 };
 
+static int disable_mtrr_trim;
+
+static int __init disable_mtrr_trim_setup(char *str)
+{
+	disable_mtrr_trim = 1;
+	return 0;
+}
+early_param("disable_mtrr_trim", disable_mtrr_trim_setup);
+
+/**
+ * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs
+ *
+ * Some buggy BIOSes don't setup the MTRRs properly for systems with certain
+ * memory configurations.  This routine checks to make sure the MTRRs having
+ * a write back type cover all of the memory the kernel is intending to use.
+ * If not, it'll trim any memory off the end by adjusting end_pfn, removing
+ * it from the kernel's allocation pools, warning the user with an obnoxious
+ * message.
+ */
+void __init mtrr_trim_uncached_memory(void)
+{
+	unsigned long i, base, size, highest_addr = 0, def, dummy;
+	mtrr_type type;
+
+	/* Make sure we only trim uncachable memory on Intel machines */
+	rdmsr(MTRRdefType_MSR, def, dummy);
+	def &= 0xff;
+	if (!use_intel() || disable_mtrr_trim || def != MTRR_TYPE_UNCACHABLE)
+		return;
+
+	/* Find highest cached pfn */
+	for (i = 0; i < num_var_ranges; i++) {
+		mtrr_if->get(i, &base, &size, &type);
+		if (type != MTRR_TYPE_WRBACK)
+			continue;
+		base <<= PAGE_SHIFT;
+		size <<= PAGE_SHIFT;
+		if (highest_addr < base + size)
+			highest_addr = base + size;
+	}
+
+	if ((highest_addr >> PAGE_SHIFT) != end_pfn) {
+		printk(KERN_WARNING "***************\n");
+		printk(KERN_WARNING "**** WARNING: likely BIOS bug\n");
+		printk(KERN_WARNING "**** MTRRs don't cover all of "
+		       "memory, trimmed %ld pages\n", end_pfn -
+		       (highest_addr >> PAGE_SHIFT));
+		printk(KERN_WARNING "***************\n");
+		end_pfn = highest_addr >> PAGE_SHIFT;
+	}
+}
 
 /**
  * mtrr_bp_init - initialize mtrrs on the boot CPU
diff --git a/arch/i386/kernel/cpu/mtrr/mtrr.h b/arch/i386/kernel/cpu/mtrr/mtrr.h
index 289dfe6..627b339 100644
--- a/arch/i386/kernel/cpu/mtrr/mtrr.h
+++ b/arch/i386/kernel/cpu/mtrr/mtrr.h
@@ -14,6 +14,7 @@
 #define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
 
 #define NUM_FIXED_RANGES 88
+#define MAX_VAR_RANGES 256
 #define MTRRfix64K_00000_MSR 0x250
 #define MTRRfix16K_80000_MSR 0x258
 #define MTRRfix16K_A0000_MSR 0x259
@@ -34,6 +35,8 @@
    an 8 bit field: */
 typedef u8 mtrr_type;
 
+extern unsigned int usage_table[MAX_VAR_RANGES];
+
 struct mtrr_ops {
 	u32	vendor;
 	u32	use_intel_if;
diff --git a/arch/x86_64/kernel/bugs.c b/arch/x86_64/kernel/bugs.c
index c3c6b91..c138eac 100644
--- a/arch/x86_64/kernel/bugs.c
+++ b/arch/x86_64/kernel/bugs.c
@@ -14,7 +14,6 @@
 void __init check_bugs(void)
 {
 	identify_cpu(&boot_cpu_data);
-	mtrr_bp_init();
 #if !defined(CONFIG_SMP)
 	printk("CPU: ");
 	print_cpu_info(&boot_cpu_data);
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index eb6524f..409b63c 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -266,6 +266,10 @@ void __init setup_arch(char **cmdline_p)
 	 * we are rounding upwards:
 	 */
 	end_pfn = e820_end_of_ram();
+	/* Trim memory not covered by WB MTRRs */
+	mtrr_bp_init();
+	mtrr_trim_uncached_memory();
+
 	num_physpages = end_pfn;
 
 	check_efer();
diff --git a/include/asm-x86_64/mtrr.h b/include/asm-x86_64/mtrr.h
index b557c48..cc62bd8 100644
--- a/include/asm-x86_64/mtrr.h
+++ b/include/asm-x86_64/mtrr.h
@@ -78,6 +78,7 @@ extern int mtrr_add_page (unsigned long base, unsigned long size,
 		     unsigned int type, char increment);
 extern int mtrr_del (int reg, unsigned long base, unsigned long size);
 extern int mtrr_del_page (int reg, unsigned long base, unsigned long size);
+extern void mtrr_trim_uncached_memory(void);
 #  else
 static __inline__ int mtrr_add (unsigned long base, unsigned long size,
 				unsigned int type, char increment)

^ permalink raw reply related	[flat|nested] 118+ messages in thread

end of thread, other threads:[~2007-07-05 12:16 UTC | newest]

Thread overview: 118+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-06-06 19:29 [PATCH] trim memory not covered by WB MTRRs Jesse Barnes
2007-06-06 20:26 ` Justin Piszcz
2007-06-06 20:28   ` Jesse Barnes
2007-06-06 20:31     ` Jesse Barnes
2007-06-06 20:37       ` Justin Piszcz
2007-06-06 20:50         ` Jesse Barnes
2007-06-06 21:26           ` Justin Piszcz
2007-06-06 21:53 ` Justin Piszcz
2007-06-06 22:03   ` Justin Piszcz
2007-06-06 22:05     ` Jesse Barnes
2007-06-06 22:07       ` Justin Piszcz
2007-06-06 22:13       ` Justin Piszcz
2007-06-06 22:24         ` Jesse Barnes
2007-06-06 22:26           ` Justin Piszcz
2007-06-06 22:28             ` Jesse Barnes
2007-06-06 22:31               ` Justin Piszcz
2007-06-06 22:35               ` Justin Piszcz
2007-06-06 22:37               ` Randy Dunlap
2007-06-06 22:46                 ` Justin Piszcz
2007-06-06 22:54                 ` Justin Piszcz
2007-06-06 23:11                   ` Randy Dunlap
2007-06-06 23:15                     ` Justin Piszcz
2007-06-06 23:34                       ` Jesse Barnes
2007-06-07  8:10                         ` Justin Piszcz
2007-06-06 22:39               ` Justin Piszcz
2007-06-06 22:57               ` Justin Piszcz
2007-06-06 23:20                 ` Jesse Barnes
2007-06-06 23:24                   ` Justin Piszcz
2007-06-06 23:27                     ` Jesse Barnes
2007-06-07  8:51                       ` Andi Kleen
2007-06-07  8:53                         ` Justin Piszcz
2007-06-07  9:55                         ` Satyam Sharma
2007-06-07 17:33                         ` Jesse Barnes
2007-06-07  7:45 ` Eric W. Biederman
2007-06-07 17:30   ` Jesse Barnes
2007-06-08 23:13     ` Eric W. Biederman
2007-06-12 15:39       ` Jesse Barnes
2007-06-07  8:16 ` Andi Kleen
2007-06-07 17:35   ` Jesse Barnes
2007-06-07 17:40     ` Justin Piszcz
2007-06-07 14:41 ` Pavel Machek
2007-06-08  0:20 ` Andrew Morton
2007-06-08  1:33   ` Jesse Barnes
2007-06-08 21:15 ` Andrew Morton
2007-06-08 21:28   ` Jesse Barnes
2007-06-13  1:11 ` Eric W. Biederman
2007-06-13  2:29   ` Jesse Barnes
2007-06-13 22:19     ` Eric W. Biederman
2007-06-20 11:22 ` Helge Hafting
2007-06-20 14:37   ` Andi Kleen
2007-06-07 22:30 Jesse Barnes
2007-06-07 22:50 ` Justin Piszcz
2007-06-07 22:53 ` Justin Piszcz
2007-06-07 23:00 ` Justin Piszcz
2007-06-08  8:20 ` Justin Piszcz
2007-06-12 14:50 ` Pavel Machek
2007-06-12 15:29   ` Jesse Barnes
2007-06-12 15:48     ` Andi Kleen
2007-06-12 21:30     ` Pavel Machek
2007-06-12 21:31       ` Justin Piszcz
2007-06-12 21:38       ` Ray Lee
2007-06-12 21:55         ` Pavel Machek
2007-06-13  0:25           ` Ray Lee
2007-06-13  8:22             ` Pavel Machek
2007-06-14 19:38 ` Pim Zandbergen
2007-06-14 20:26   ` Justin Piszcz
2007-06-14 21:18     ` Jesse Barnes
2007-06-14 21:21       ` Justin Piszcz
2007-06-14 21:26         ` Jesse Barnes
2007-06-15 10:21       ` Pim Zandbergen
2007-06-15 16:20         ` Jesse Barnes
2007-06-21 14:24           ` Pim Zandbergen
2007-06-21 14:28             ` Justin Piszcz
2007-06-25 16:31             ` Pim Zandbergen
2007-06-25 16:34               ` Justin Piszcz
2007-06-15 10:17     ` Pim Zandbergen
2007-06-15 10:34       ` Justin Piszcz
2007-06-15 17:28       ` Jesse Barnes
2007-06-20 13:55         ` Pim Zandbergen
2007-06-21 19:40 ` Yinghai Lu
2007-06-21 19:56   ` Jesse Barnes
     [not found] <fa.i7vJP3lxWAlyOLjcsqOWPKlixD8@ifi.uio.no>
     [not found] ` <fa.3ijVoClbWNHWrMhDABWjNPxp+wo@ifi.uio.no>
     [not found]   ` <fa.ZqgSvRGj/scOmd0AwnU6e21Gcwc@ifi.uio.no>
     [not found]     ` <fa.oNsjw768fkDpx3oef91fjAQs1Iw@ifi.uio.no>
     [not found]       ` <fa.x8ZCt4n0yXI1llhRq4wfjNfqK4w@ifi.uio.no>
2007-06-08  1:57         ` Robert Hancock
     [not found] <8tyOc-8f0-17@gated-at.bofh.it>
2007-06-13  6:52 ` Bodo Eggert
2007-06-13 16:19   ` Dave Jones
2007-06-25 21:34 Jesse Barnes
2007-06-25 21:45 ` Justin Piszcz
2007-06-25 22:01 ` Andrew Morton
2007-06-25 22:05   ` Jesse Barnes
2007-06-25 22:29     ` Justin Piszcz
2007-06-25 23:34 ` Andi Kleen
2007-06-25 23:36   ` Jesse Barnes
2007-06-26  0:54     ` Eric W. Biederman
2007-06-26  3:29       ` Jesse Barnes
2007-06-26  3:30         ` Jesse Barnes
2007-06-26 15:03           ` Andi Kleen
2007-06-26 15:07             ` Jesse Barnes
2007-06-26 15:18               ` Jesse Barnes
2007-06-26 15:39       ` Andi Kleen
2007-06-26 15:54         ` Yinghai Lu
2007-06-26 16:06         ` Eric W. Biederman
2007-06-26 17:38           ` Andi Kleen
2007-06-26 18:55             ` Yinghai Lu
2007-06-26 15:02     ` Andi Kleen
2007-06-26 15:38       ` Jesse Barnes
2007-06-27 10:44 ` Pim Zandbergen
2007-06-27 11:22   ` Andi Kleen
2007-06-27 11:40     ` Pim Zandbergen
2007-06-27 11:44       ` Justin Piszcz
2007-06-27 14:22   ` Mauro Giachero
2007-06-27 15:04     ` Jesse Barnes
2007-06-27 16:00       ` Pim Zandbergen
2007-06-27 16:07         ` Jesse Barnes
2007-06-27 16:22         ` Jesse Barnes
2007-06-27 17:02           ` Pim Zandbergen
2007-06-27 17:06             ` Jesse Barnes
2007-06-27 17:17               ` Pim Zandbergen
2007-07-05 12:12 ` Pavel Machek
2007-07-05 12:16   ` Justin Piszcz

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).