linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH RFC] x86: check for and defend against BIOS memory corruption
@ 2008-08-28 19:52 Jeremy Fitzhardinge
  2008-08-29  1:49 ` Yinghai Lu
                   ` (2 more replies)
  0 siblings, 3 replies; 35+ messages in thread
From: Jeremy Fitzhardinge @ 2008-08-28 19:52 UTC (permalink / raw)
  To: Ingo Molnar, Rafał Miłecki, Alan Jenkins, Hugh Dickens,
	H. Peter Anvin
  Cc: Linux Kernel Mailing List

Some BIOSes have been observed to corrupt memory in the low 64k.  This
patch does two things:
 - Reserves all memory which does not have to be in that area, to
   prevent it from being used as general memory by the kernel.  Things
   like the SMP trampoline are still in the memory, however.
 - Clears the reserved memory so we can observe changes to it.
 - Adds a function check_for_bios_corruption() which checks and reports on
   memory becoming unexpectedly non-zero.  Currently it's called in the
   x86 fault handler, and the powermanagement debug output.

RFC: What other places should we check for corruption in?

[ Alan, Rafał: could you check you see:
   1: corruption messages
   2: no crashes
  Thanks -J
]

Signed-off-by: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Cc: Hugh Dickens <hugh@veritas.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Rafael J. Wysocki <rjw@sisk.pl>
Cc: Rafał Miłecki <zajec5@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
---
 Documentation/kernel-parameters.txt |    5 ++
 arch/x86/Kconfig                    |    3 +
 arch/x86/kernel/setup.c             |   86 +++++++++++++++++++++++++++++++++++
 arch/x86/mm/fault.c                 |    2 
 drivers/base/power/main.c           |    1 
 include/linux/kernel.h              |   12 ++++
 6 files changed, 109 insertions(+)

===================================================================
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -359,6 +359,11 @@
 			BayCom Serial Port AX.25 Modem (Half Duplex Mode)
 			Format: <io>,<irq>,<mode>
 			See header of drivers/net/hamradio/baycom_ser_hdx.c.
+
+	bios_corruption_check=0/1 [X86]
+			Some BIOSes seem to corrupt the first 64k of memory
+			when doing things like suspend/resume.  Setting this
+			option will scan the memory looking for corruption.
 
 	boot_delay=	Milliseconds to delay each printk during boot.
 			Values larger than 10 seconds (10000) are changed to
===================================================================
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -203,6 +203,9 @@
 	bool
 	depends on X86_SMP || (X86_VOYAGER && SMP) || (64BIT && ACPI_SLEEP)
 	default y
+
+config X86_CHECK_BIOS_CORRUPTION
+        def_bool y
 
 config KTIME_SCALAR
 	def_bool X86_32
===================================================================
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -582,6 +582,88 @@
 struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
 
 /*
+ * Some BIOSes seem to corrupt the low 64k of memory during events
+ * like suspend/resume and unplugging an HDMI cable.  Reserve all
+ * remaining free memory in that area and fill it with a distinct
+ * pattern.
+ */
+#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
+#define MAX_SCAN_AREAS	8
+static struct e820entry scan_areas[MAX_SCAN_AREAS];
+static int num_scan_areas;
+
+static void __init setup_bios_corruption_check(void)
+{
+	u64 addr = PAGE_SIZE;	/* assume first page is reserved anyway */
+
+	while(addr < 0x10000 && num_scan_areas < MAX_SCAN_AREAS) {
+		u64 size;
+		addr = find_e820_area_size(addr, &size, PAGE_SIZE);
+
+		if (addr == 0)
+			break;
+
+		if ((addr + size) > 0x10000)
+			size = 0x10000 - addr;
+
+		if (size == 0)
+			break;
+
+		e820_update_range(addr, size, E820_RAM, E820_RESERVED);
+		scan_areas[num_scan_areas].addr = addr;
+		scan_areas[num_scan_areas].size = size;
+		num_scan_areas++;
+
+		/* Assume we've already mapped this early memory */
+		memset(__va(addr), 0, size);
+
+		addr += size;
+	}
+
+	printk(KERN_INFO "scanning %d areas for BIOS corruption\n",
+	       num_scan_areas);
+	update_e820();
+}
+
+static int __read_mostly bios_corruption_check = 1;
+
+void check_for_bios_corruption(void)
+{
+	int i;
+	int corruption = 0;
+
+	if (!bios_corruption_check)
+		return;
+
+	for(i = 0; i < num_scan_areas; i++) {
+		unsigned long *addr = __va(scan_areas[i].addr);
+		unsigned long size = scan_areas[i].size;
+
+		for(; size; addr++, size--) {
+			if (!*addr)
+				continue;
+			printk(KERN_ERR "Corrupted low memory at %p (%lx phys) = %08lx\n",
+			       addr, __pa(addr), *addr);
+			corruption = 1;
+		}
+	}
+
+	if (corruption)
+		dump_stack();
+}
+
+static int set_bios_corruption_check(char *arg)
+{
+	char *end;
+
+	bios_corruption_check = simple_strtol(arg, &end, 10);
+
+	return (*end == 0) ? 0 : -EINVAL;
+}
+early_param("bios_corruption_check", set_bios_corruption_check);
+#endif
+
+/*
  * Determine if we were loaded by an EFI loader.  If so, then we have also been
  * passed the efi memmap, systab, etc., so we should use these data structures
  * for initialization.  Note, the efi init code path is determined by the
@@ -766,6 +848,10 @@
 		max_low_pfn = max_pfn;
 
 	high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
+#endif
+
+#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
+	setup_bios_corruption_check();
 #endif
 
 	/* max_pfn_mapped is updated here */
===================================================================
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -864,6 +864,8 @@
  * Oops. The kernel tried to access some bad page. We'll have to
  * terminate things with extreme prejudice.
  */
+	check_for_bios_corruption();
+
 #ifdef CONFIG_X86_32
 	bust_spinlocks(1);
 #else
===================================================================
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -254,6 +254,7 @@
 
 static void pm_dev_dbg(struct device *dev, pm_message_t state, char *info)
 {
+	check_for_bios_corruption();
 	dev_dbg(dev, "%s%s%s\n", info, pm_verb(state.event),
 		((state.event & PM_EVENT_SLEEP) && device_may_wakeup(dev)) ?
 		", may wakeup" : "");
===================================================================
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -246,6 +246,18 @@
 extern void add_taint(unsigned);
 extern int root_mountflags;
 
+#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
+/*
+ * This is obviously not a great place for this, but we want to be
+ * able to scatter it around anywhere in the kernel.
+ */
+void check_for_bios_corruption(void);
+#else
+static inline void check_for_bios_corruption(void)
+{
+}
+#endif
+
 /* Values used for system_state */
 extern enum system_states {
 	SYSTEM_BOOTING,



^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC] x86: check for and defend against BIOS memory corruption
  2008-08-28 19:52 [PATCH RFC] x86: check for and defend against BIOS memory corruption Jeremy Fitzhardinge
@ 2008-08-29  1:49 ` Yinghai Lu
  2008-08-29  3:28   ` Jeremy Fitzhardinge
  2008-08-29  6:20 ` Rafał Miłecki
  2008-08-29  8:14 ` Hugh Dickins
  2 siblings, 1 reply; 35+ messages in thread
From: Yinghai Lu @ 2008-08-29  1:49 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: Ingo Molnar, Rafał Miłecki, Alan Jenkins, Hugh Dickens,
	H. Peter Anvin, Linux Kernel Mailing List

On Thu, Aug 28, 2008 at 12:52 PM, Jeremy Fitzhardinge <jeremy@goop.org> wrote:> Some BIOSes have been observed to corrupt memory in the low 64k.  This> patch does two things:>  - Reserves all memory which does not have to be in that area, to>   prevent it from being used as general memory by the kernel.  Things>   like the SMP trampoline are still in the memory, however.>  - Clears the reserved memory so we can observe changes to it.>  - Adds a function check_for_bios_corruption() which checks and reports on>   memory becoming unexpectedly non-zero.  Currently it's called in the>   x86 fault handler, and the powermanagement debug output.>> RFC: What other places should we check for corruption in?>> [ Alan, Rafał: could you check you see:>   1: corruption messages>   2: no crashes>  Thanks -J> ]>> Signed-off-by: Jeremy Fitzhardinge <jeremy@goop.org>> Cc: Alan Jenkins <alan-jenkins@tuffmail.co.uk>> Cc: Hugh Dickens <hugh@veritas.com>> Cc: Ingo Molnar <mingo@elte.hu>> Cc: Rafael J. Wysocki <rjw@sisk.pl>> Cc: Rafał Miłecki <zajec5@gmail.com>> Cc: H. Peter Anvin <hpa@zytor.com>> --->  Documentation/kernel-parameters.txt |    5 ++>  arch/x86/Kconfig                    |    3 +>  arch/x86/kernel/setup.c             |   86 +++++++++++++++++++++++++++++++++++>  arch/x86/mm/fault.c                 |    2>  drivers/base/power/main.c           |    1>  include/linux/kernel.h              |   12 ++++>  6 files changed, 109 insertions(+)>> ===================================================================> --- a/Documentation/kernel-parameters.txt> +++ b/Documentation/kernel-parameters.txt> @@ -359,6 +359,11 @@>                        BayCom Serial Port AX.25 Modem (Half Duplex Mode)>                        Format: <io>,<irq>,<mode>>                        See header of drivers/net/hamradio/baycom_ser_hdx.c.> +> +       bios_corruption_check=0/1 [X86]> +                       Some BIOSes seem to corrupt the first 64k of memory> +                       when doing things like suspend/resume.  Setting this> +                       option will scan the memory looking for corruption.>>        boot_delay=     Milliseconds to delay each printk during boot.>                        Values larger than 10 seconds (10000) are changed to> ===================================================================> --- a/arch/x86/Kconfig> +++ b/arch/x86/Kconfig> @@ -203,6 +203,9 @@>        bool>        depends on X86_SMP || (X86_VOYAGER && SMP) || (64BIT && ACPI_SLEEP)>        default y> +> +config X86_CHECK_BIOS_CORRUPTION> +        def_bool y>>  config KTIME_SCALAR>        def_bool X86_32> ===================================================================> --- a/arch/x86/kernel/setup.c> +++ b/arch/x86/kernel/setup.c> @@ -582,6 +582,88 @@>  struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;>>  /*> + * Some BIOSes seem to corrupt the low 64k of memory during events> + * like suspend/resume and unplugging an HDMI cable.  Reserve all> + * remaining free memory in that area and fill it with a distinct> + * pattern.> + */> +#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION> +#define MAX_SCAN_AREAS 8> +static struct e820entry scan_areas[MAX_SCAN_AREAS];> +static int num_scan_areas;> +> +static void __init setup_bios_corruption_check(void)> +{> +       u64 addr = PAGE_SIZE;   /* assume first page is reserved anyway */> +
can you please not punish systems without this bios problem?
if (!bios_corruption_check)   return;
> +       while(addr < 0x10000 && num_scan_areas < MAX_SCAN_AREAS) {> +               u64 size;> +               addr = find_e820_area_size(addr, &size, PAGE_SIZE);> +> +               if (addr == 0)> +                       break;> +> +               if ((addr + size) > 0x10000)> +                       size = 0x10000 - addr;> +> +               if (size == 0)> +                       break;> +> +               e820_update_range(addr, size, E820_RAM, E820_RESERVED);> +               scan_areas[num_scan_areas].addr = addr;> +               scan_areas[num_scan_areas].size = size;> +               num_scan_areas++;> +> +               /* Assume we've already mapped this early memory */> +               memset(__va(addr), 0, size);> +> +               addr += size;> +       }> +> +       printk(KERN_INFO "scanning %d areas for BIOS corruption\n",> +              num_scan_areas);> +       update_e820();> +}> +> +static int __read_mostly bios_corruption_check = 1;
move earlier andbios_corruption_check = 0;
BTW: SMI evil damaged that area?
YH˙ôčş{.nÇ+‰ˇŸŽ‰­†+%ŠË˙ąéÝś\x17ĽŠw˙ş{.nÇ+‰ˇĽŠ{ąţGŤé˙Š{ayş\x1dʇڙë,j\a­˘fŁ˘ˇhšďę˙‘ęçz_čŽ\x03(­éšŽŠÝ˘j"ú\x1aś^[m§˙˙ž\aŤţGŤé˙˘¸?™¨č­Ú&Łř§~áśiO•ćŹzˇšvŘ^\x14\x04\x1aś^[m§˙˙Ă\f˙śě˙˘¸?–IĽ

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC] x86: check for and defend against BIOS memory corruption
  2008-08-29  1:49 ` Yinghai Lu
@ 2008-08-29  3:28   ` Jeremy Fitzhardinge
  2008-08-29  9:25     ` Alan Cox
  2008-08-29 20:31     ` Kasper Sandberg
  0 siblings, 2 replies; 35+ messages in thread
From: Jeremy Fitzhardinge @ 2008-08-29  3:28 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Ingo Molnar, Rafał Miłecki, Alan Jenkins, Hugh Dickens,
	H. Peter Anvin, Linux Kernel Mailing List

Yinghai Lu wrote:
> On Thu, Aug 28, 2008 at 12:52 PM, Jeremy Fitzhardinge <jeremy@goop.org> wrote:
>   
>> Some BIOSes have been observed to corrupt memory in the low 64k.  This
>> patch does two things:
>>  - Reserves all memory which does not have to be in that area, to
>>   prevent it from being used as general memory by the kernel.  Things
>>   like the SMP trampoline are still in the memory, however.
>>  - Clears the reserved memory so we can observe changes to it.
>>  - Adds a function check_for_bios_corruption() which checks and reports on
>>   memory becoming unexpectedly non-zero.  Currently it's called in the
>>   x86 fault handler, and the powermanagement debug output.
>>
>> RFC: What other places should we check for corruption in?
>>
>> [ Alan, Rafał: could you check you see:
>>   1: corruption messages
>>   2: no crashes
>>  Thanks -J
>> ]
>>
>> Signed-off-by: Jeremy Fitzhardinge <jeremy@goop.org>
>> Cc: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
>> Cc: Hugh Dickens <hugh@veritas.com>
>> Cc: Ingo Molnar <mingo@elte.hu>
>> Cc: Rafael J. Wysocki <rjw@sisk.pl>
>> Cc: Rafał Miłecki <zajec5@gmail.com>
>> Cc: H. Peter Anvin <hpa@zytor.com>
>> ---
>>  Documentation/kernel-parameters.txt |    5 ++
>>  arch/x86/Kconfig                    |    3 +
>>  arch/x86/kernel/setup.c             |   86 +++++++++++++++++++++++++++++++++++
>>  arch/x86/mm/fault.c                 |    2
>>  drivers/base/power/main.c           |    1
>>  include/linux/kernel.h              |   12 ++++
>>  6 files changed, 109 insertions(+)
>>
>> ===================================================================
>> --- a/Documentation/kernel-parameters.txt
>> +++ b/Documentation/kernel-parameters.txt
>> @@ -359,6 +359,11 @@
>>                        BayCom Serial Port AX.25 Modem (Half Duplex Mode)
>>                        Format: <io>,<irq>,<mode>
>>                        See header of drivers/net/hamradio/baycom_ser_hdx.c.
>> +
>> +       bios_corruption_check=0/1 [X86]
>> +                       Some BIOSes seem to corrupt the first 64k of memory
>> +                       when doing things like suspend/resume.  Setting this
>> +                       option will scan the memory looking for corruption.
>>
>>        boot_delay=     Milliseconds to delay each printk during boot.
>>                        Values larger than 10 seconds (10000) are changed to
>> ===================================================================
>> --- a/arch/x86/Kconfig
>> +++ b/arch/x86/Kconfig
>> @@ -203,6 +203,9 @@
>>        bool
>>        depends on X86_SMP || (X86_VOYAGER && SMP) || (64BIT && ACPI_SLEEP)
>>        default y
>> +
>> +config X86_CHECK_BIOS_CORRUPTION
>> +        def_bool y
>>
>>  config KTIME_SCALAR
>>        def_bool X86_32
>> ===================================================================
>> --- a/arch/x86/kernel/setup.c
>> +++ b/arch/x86/kernel/setup.c
>> @@ -582,6 +582,88 @@
>>  struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
>>
>>  /*
>> + * Some BIOSes seem to corrupt the low 64k of memory during events
>> + * like suspend/resume and unplugging an HDMI cable.  Reserve all
>> + * remaining free memory in that area and fill it with a distinct
>> + * pattern.
>> + */
>> +#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
>> +#define MAX_SCAN_AREAS 8
>> +static struct e820entry scan_areas[MAX_SCAN_AREAS];
>> +static int num_scan_areas;
>> +
>> +static void __init setup_bios_corruption_check(void)
>> +{
>> +       u64 addr = PAGE_SIZE;   /* assume first page is reserved anyway */
>> +
>>     
>
> can you please not punish systems without this bios problem?
>
> if (!bios_corruption_check)
>    return;
>   

Yeah, OK, but I think it should default to ON for now.  The problem is
that we had two very different systems (Sony Vaio and Intel desktop)
exhibit the same problem in two different ways.  These systems worked
fine until we slightly changed the way that pagetable memory was
allocated.  We just don't know how many other systems are doing this
kind of subtle corruption of low memory.

    J

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC] x86: check for and defend against BIOS memory corruption
  2008-08-28 19:52 [PATCH RFC] x86: check for and defend against BIOS memory corruption Jeremy Fitzhardinge
  2008-08-29  1:49 ` Yinghai Lu
@ 2008-08-29  6:20 ` Rafał Miłecki
  2008-08-29  6:45   ` Ingo Molnar
  2008-08-29  7:22   ` Jeremy Fitzhardinge
  2008-08-29  8:14 ` Hugh Dickins
  2 siblings, 2 replies; 35+ messages in thread
From: Rafał Miłecki @ 2008-08-29  6:20 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: Ingo Molnar, Alan Jenkins, Hugh Dickens, H. Peter Anvin,
	Linux Kernel Mailing List

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset=UTF-8, Size: 1205 bytes --]

2008/8/28 Jeremy Fitzhardinge <jeremy@goop.org>:> Some BIOSes have been observed to corrupt memory in the low 64k.  This> patch does two things:>  - Reserves all memory which does not have to be in that area, to>   prevent it from being used as general memory by the kernel.  Things>   like the SMP trampoline are still in the memory, however.>  - Clears the reserved memory so we can observe changes to it.>  - Adds a function check_for_bios_corruption() which checks and reports on>   memory becoming unexpectedly non-zero.  Currently it's called in the>   x86 fault handler, and the powermanagement debug output.>> RFC: What other places should we check for corruption in?>> [ Alan, Rafał: could you check you see:>   1: corruption messages>   2: no crashes>  Thanks -J> ]
I was trying my best to crash system with this patch applied and failed :)
Works great.
Just wonder if I should expect any printk fromcheck_for_bios_corruption? I do not see any:
zajec@sony:~> dmesg | grep -i corrscanning 2 areas for BIOS corruption
-- RafaÅ‚ MiÅ‚eckiÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC] x86: check for and defend against BIOS memory corruption
  2008-08-29  6:20 ` Rafał Miłecki
@ 2008-08-29  6:45   ` Ingo Molnar
  2008-08-29  7:21     ` Jeremy Fitzhardinge
  2008-08-29  7:22   ` Jeremy Fitzhardinge
  1 sibling, 1 reply; 35+ messages in thread
From: Ingo Molnar @ 2008-08-29  6:45 UTC (permalink / raw)
  To: Rafał Miłecki
  Cc: Jeremy Fitzhardinge, Alan Jenkins, Hugh Dickens, H. Peter Anvin,
	Linux Kernel Mailing List


* Rafał Miłecki <zajec5@gmail.com> wrote:

> 2008/8/28 Jeremy Fitzhardinge <jeremy@goop.org>:
> > Some BIOSes have been observed to corrupt memory in the low 64k.  This
> > patch does two things:
> >  - Reserves all memory which does not have to be in that area, to
> >   prevent it from being used as general memory by the kernel.  Things
> >   like the SMP trampoline are still in the memory, however.
> >  - Clears the reserved memory so we can observe changes to it.
> >  - Adds a function check_for_bios_corruption() which checks and reports on
> >   memory becoming unexpectedly non-zero.  Currently it's called in the
> >   x86 fault handler, and the powermanagement debug output.
> >
> > RFC: What other places should we check for corruption in?
> >
> > [ Alan, Rafał: could you check you see:
> >   1: corruption messages
> >   2: no crashes
> >  Thanks -J
> > ]
> 
> I was trying my best to crash system with this patch applied and failed :)
> 
> Works great.
> 
> Just wonder if I should expect any printk from
> check_for_bios_corruption? I do not see any:
> 
> zajec@sony:~> dmesg | grep -i corr
> scanning 2 areas for BIOS corruption

that's _very_ weird.

maybe the BIOS expects _zeroes_ somewhere? Do you suddenly see crashes 
if you change this line in Jeremy's patch:

+               memset(__va(addr), 0, size);

to something like:

+               memset(__va(addr), 0x55, size);

If this does not tickle any messages either, then maybe the problem is 
in the identity of the entities we allocate in the first 64K. Is there a 
list of allocations that go there when Jeremy's patch is not applied?

but ... i think with an earlier patch you saw corruption, right? 
Far-fetched idea: maybe it's some CPU erratum during suspend/resume that 
corrupts pagetables if the pagetables are allocated in the first 64K of 
RAM? In that case we should use a bootmem allocation for pagetables that 
give a minimum address of 64K.

	Ingo

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC] x86: check for and defend against BIOS memory corruption
  2008-08-29  6:45   ` Ingo Molnar
@ 2008-08-29  7:21     ` Jeremy Fitzhardinge
  2008-08-29  7:30       ` Ingo Molnar
  0 siblings, 1 reply; 35+ messages in thread
From: Jeremy Fitzhardinge @ 2008-08-29  7:21 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Rafał Miłecki, Alan Jenkins, Hugh Dickens,
	H. Peter Anvin, Linux Kernel Mailing List

Ingo Molnar wrote:
> * Rafał Miłecki <zajec5@gmail.com> wrote:
>
>   
>> 2008/8/28 Jeremy Fitzhardinge <jeremy@goop.org>:
>>     
>>> Some BIOSes have been observed to corrupt memory in the low 64k.  This
>>> patch does two things:
>>>  - Reserves all memory which does not have to be in that area, to
>>>   prevent it from being used as general memory by the kernel.  Things
>>>   like the SMP trampoline are still in the memory, however.
>>>  - Clears the reserved memory so we can observe changes to it.
>>>  - Adds a function check_for_bios_corruption() which checks and reports on
>>>   memory becoming unexpectedly non-zero.  Currently it's called in the
>>>   x86 fault handler, and the powermanagement debug output.
>>>
>>> RFC: What other places should we check for corruption in?
>>>
>>> [ Alan, Rafał: could you check you see:
>>>   1: corruption messages
>>>   2: no crashes
>>>  Thanks -J
>>> ]
>>>       
>> I was trying my best to crash system with this patch applied and failed :)
>>
>> Works great.
>>
>> Just wonder if I should expect any printk from
>> check_for_bios_corruption? I do not see any:
>>
>> zajec@sony:~> dmesg | grep -i corr
>> scanning 2 areas for BIOS corruption
>>     
>
> that's _very_ weird.
>   

No, it's expected.  Rafał only got corruption when plugging his HDMI
cable, and I didn't put any corruption checks on that path (I'm not even
sure what kernel code would get executed in that case).  Hugh's original
patch put a check in the hot path of the fault handler - and so it would
get called regularly - but I put it in the kernel-bug path, which is
fairly pointless given that we expect this patch to prevent the crashes.

It does, however, do the check in the pm state changes, so doing a
suspend should make it print some of the corruption it found.  Alan's
case would be a better test for that though.

It does raise the question of where the good places to put the check
are.  It shouldn't be too hot, given that it's scanning ~64k of memory,
but often enough to actually show something.  I was thinking of putting
some calls in the acpi code itself, but got, erm, discouraged.

Maybe hooking into a sysrq key would be useful (sysrq-m?).

> maybe the BIOS expects _zeroes_ somewhere? Do you suddenly see crashes 
> if you change this line in Jeremy's patch:
>
> +               memset(__va(addr), 0, size);
>
> to something like:
>
> +               memset(__va(addr), 0x55, size);
>
> If this does not tickle any messages either, then maybe the problem is 
> in the identity of the entities we allocate in the first 64K. Is there a 
> list of allocations that go there when Jeremy's patch is not applied?
>
> but ... i think with an earlier patch you saw corruption, right? 
> Far-fetched idea: maybe it's some CPU erratum during suspend/resume that 
> corrupts pagetables if the pagetables are allocated in the first 64K of 
> RAM? In that case we should use a bootmem allocation for pagetables that 
> give a minimum address of 64K.
>   

Rafał's corruption was definitely non-zero.  I think the corruption is
happening, but it's just not reported.

    J

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC] x86: check for and defend against BIOS memory corruption
  2008-08-29  6:20 ` Rafał Miłecki
  2008-08-29  6:45   ` Ingo Molnar
@ 2008-08-29  7:22   ` Jeremy Fitzhardinge
  1 sibling, 0 replies; 35+ messages in thread
From: Jeremy Fitzhardinge @ 2008-08-29  7:22 UTC (permalink / raw)
  To: Rafał Miłecki
  Cc: Ingo Molnar, Alan Jenkins, Hugh Dickens, H. Peter Anvin,
	Linux Kernel Mailing List

Rafał Miłecki wrote:
> I was trying my best to crash system with this patch applied and failed :)
>
> Works great.
>   

Good.

> Just wonder if I should expect any printk from
> check_for_bios_corruption? I do not see any:
>
> zajec@sony:~> dmesg | grep -i corr
> scanning 2 areas for BIOS corruption
>   

I just followed up in detail to Ingo, but I think it's because I didn't
put any checks where you would hit them.  If you suspend/resume it
should print something.

    J

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC] x86: check for and defend against BIOS memory corruption
  2008-08-29  7:21     ` Jeremy Fitzhardinge
@ 2008-08-29  7:30       ` Ingo Molnar
  2008-08-29  8:02         ` Jeremy Fitzhardinge
  0 siblings, 1 reply; 35+ messages in thread
From: Ingo Molnar @ 2008-08-29  7:30 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: Rafał Miłecki, Alan Jenkins, Hugh Dickens,
	H. Peter Anvin, Linux Kernel Mailing List


* Jeremy Fitzhardinge <jeremy@goop.org> wrote:

> Rafał's corruption was definitely non-zero.  I think the corruption is 
> happening, but it's just not reported.

i thought we did a check after coming back from resume? That would be a 
natural place to do the check. (the corruption is suspend/resume related 
after all, right?)

	Ingo

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC] x86: check for and defend against BIOS memory corruption
  2008-08-29  7:30       ` Ingo Molnar
@ 2008-08-29  8:02         ` Jeremy Fitzhardinge
  0 siblings, 0 replies; 35+ messages in thread
From: Jeremy Fitzhardinge @ 2008-08-29  8:02 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Rafał Miłecki, Alan Jenkins, Hugh Dickens,
	H. Peter Anvin, Linux Kernel Mailing List

Ingo Molnar wrote:
> * Jeremy Fitzhardinge <jeremy@goop.org> wrote:
>
>   
>> Rafał's corruption was definitely non-zero.  I think the corruption is 
>> happening, but it's just not reported.
>>     
>
> i thought we did a check after coming back from resume? That would be a 
> natural place to do the check. (the corruption is suspend/resume related 
> after all, right?)

No, in Rafał's case it happens when he unplugs an HDMI cable.

    J

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC] x86: check for and defend against BIOS memory corruption
  2008-08-28 19:52 [PATCH RFC] x86: check for and defend against BIOS memory corruption Jeremy Fitzhardinge
  2008-08-29  1:49 ` Yinghai Lu
  2008-08-29  6:20 ` Rafał Miłecki
@ 2008-08-29  8:14 ` Hugh Dickins
  2008-08-29 14:48   ` Jeremy Fitzhardinge
                     ` (2 more replies)
  2 siblings, 3 replies; 35+ messages in thread
From: Hugh Dickins @ 2008-08-29  8:14 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: Ingo Molnar, Rafał Miłecki, Alan Jenkins,
	H. Peter Anvin, Linux Kernel Mailing List

[-- Attachment #1: Type: TEXT/PLAIN, Size: 10071 bytes --]

Thanks for taking this on, Jeremy: I had too many doubts to do so.

On Thu, 28 Aug 2008, Jeremy Fitzhardinge wrote:
> Some BIOSes have been observed to corrupt memory in the low 64k.

hpa introduced the 64k idea, and we've all been repeating it;
but I've not heard the reasoning behind it.  Is it a fundamental
addressing limitation within the BIOS memory model?  Or a case
that Windows treats the bottom 64k as scratch, so BIOS testers
won't notice if they corrupt it?

The two instances of corruption we've been studying have indeed
been below 64k (one in page 8 and one in page 11), but that's
because they were both recognizable corruptions of direct map PMDs.

If there is not a very strong justification for that 64k limit,
then I don't think this approach will be very useful, and we should
simply continue to rely on analyzing corruption when it appears, and
recommend memmap= as a way of avoiding it once analyzed.  If there
is a strong justification for it, please dispel my ignorance!

> This patch does two things:
>  - Reserves all memory which does not have to be in that area, to
>    prevent it from being used as general memory by the kernel.  Things
>    like the SMP trampoline are still in the memory, however.
>  - Clears the reserved memory so we can observe changes to it.
>  - Adds a function check_for_bios_corruption() which checks and reports on
>    memory becoming unexpectedly non-zero.  Currently it's called in the
>    x86 fault handler, and the powermanagement debug output.
> 
> RFC: What other places should we check for corruption in?

I don't know: the easy answer would be just to do it once every minute.
As the patch stands, we'll only learn more on machines going through
suspend+resume (disk or ram).  If the corruption avoidance works,
then the fault case should never trigger.

> 
> [ Alan, Rafał: could you check you see:
>    1: corruption messages
>    2: no crashes
>   Thanks -J
> ]
> 
> Signed-off-by: Jeremy Fitzhardinge <jeremy@goop.org>
> Cc: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
> Cc: Hugh Dickens <hugh@veritas.com>

           Dickins

> Cc: Ingo Molnar <mingo@elte.hu>
> Cc: Rafael J. Wysocki <rjw@sisk.pl>
> Cc: Rafał Miłecki <zajec5@gmail.com>
> Cc: H. Peter Anvin <hpa@zytor.com>
> ---
>  Documentation/kernel-parameters.txt |    5 ++
>  arch/x86/Kconfig                    |    3 +
>  arch/x86/kernel/setup.c             |   86 +++++++++++++++++++++++++++++++++++
>  arch/x86/mm/fault.c                 |    2 
>  drivers/base/power/main.c           |    1 
>  include/linux/kernel.h              |   12 ++++
>  6 files changed, 109 insertions(+)
> 
> ===================================================================
> --- a/Documentation/kernel-parameters.txt
> +++ b/Documentation/kernel-parameters.txt
> @@ -359,6 +359,11 @@
>  			BayCom Serial Port AX.25 Modem (Half Duplex Mode)
>  			Format: <io>,<irq>,<mode>
>  			See header of drivers/net/hamradio/baycom_ser_hdx.c.
> +
> +	bios_corruption_check=0/1 [X86]
> +			Some BIOSes seem to corrupt the first 64k of memory
> +			when doing things like suspend/resume.  Setting this
> +			option will scan the memory looking for corruption.

It's actually a bottom_of_memory corruption_check: it would pick
up corruption there whether it's caused by the BIOS or not.
The boot parameter description ought to refer to the config
option: ah, the config option is always on for x86, hmm.

If the 64k is in any doubt, maybe the corruption_check boot option
should specify limiting address rather than just off/on.

>  
>  	boot_delay=	Milliseconds to delay each printk during boot.
>  			Values larger than 10 seconds (10000) are changed to
> ===================================================================
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -203,6 +203,9 @@
>  	bool
>  	depends on X86_SMP || (X86_VOYAGER && SMP) || (64BIT && ACPI_SLEEP)
>  	default y
> +
> +config X86_CHECK_BIOS_CORRUPTION
> +        def_bool y

Always on?  For the moment, perhaps.  I'm very pleased to see you've
set it on for x86_32 as well as for x86_64, I'd been wanting to ask
what's been happening on 32-bit.

>  
>  config KTIME_SCALAR
>  	def_bool X86_32
> ===================================================================
> --- a/arch/x86/kernel/setup.c
> +++ b/arch/x86/kernel/setup.c
> @@ -582,6 +582,88 @@
>  struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
>  
>  /*
> + * Some BIOSes seem to corrupt the low 64k of memory during events
> + * like suspend/resume and unplugging an HDMI cable.  Reserve all
> + * remaining free memory in that area and fill it with a distinct
> + * pattern.
> + */
> +#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
> +#define MAX_SCAN_AREAS	8
> +static struct e820entry scan_areas[MAX_SCAN_AREAS];
> +static int num_scan_areas;
> +
> +static void __init setup_bios_corruption_check(void)
> +{
> +	u64 addr = PAGE_SIZE;	/* assume first page is reserved anyway */
> +
> +	while(addr < 0x10000 && num_scan_areas < MAX_SCAN_AREAS) {
> +		u64 size;
> +		addr = find_e820_area_size(addr, &size, PAGE_SIZE);
> +
> +		if (addr == 0)
> +			break;
> +
> +		if ((addr + size) > 0x10000)
> +			size = 0x10000 - addr;

Here (and the patch description) might be the right place to justify 64k.

> +
> +		if (size == 0)
> +			break;
> +
> +		e820_update_range(addr, size, E820_RAM, E820_RESERVED);
> +		scan_areas[num_scan_areas].addr = addr;
> +		scan_areas[num_scan_areas].size = size;
> +		num_scan_areas++;
> +
> +		/* Assume we've already mapped this early memory */
> +		memset(__va(addr), 0, size);
> +
> +		addr += size;
> +	}
> +
> +	printk(KERN_INFO "scanning %d areas for BIOS corruption\n",
> +	       num_scan_areas);
> +	update_e820();
> +}
> +
> +static int __read_mostly bios_corruption_check = 1;
> +
> +void check_for_bios_corruption(void)
> +{
> +	int i;
> +	int corruption = 0;
> +
> +	if (!bios_corruption_check)
> +		return;
> +
> +	for(i = 0; i < num_scan_areas; i++) {
> +		unsigned long *addr = __va(scan_areas[i].addr);

Small point, but since we're doing the same on 32-bit and 64-bit,
I think it would be better to operate on unsigned ints, to get
the same kind of printout in the two cases.

> +		unsigned long size = scan_areas[i].size;
> +
> +		for(; size; addr++, size--) {
> +			if (!*addr)
> +				continue;
> +			printk(KERN_ERR "Corrupted low memory at %p (%lx phys) = %08lx\n",
> +			       addr, __pa(addr), *addr);

Don't you need to reset *addr to 0 here, to avoid noise ever after?

> +			corruption = 1;
> +		}
> +	}
> +
> +	if (corruption)
> +		dump_stack();

The purpose of the dump_stack being to insert a recognizable and
irritating spew into the log, prompting people to report these cases:
the stacktrace itself is unlikely to be relevant.  Is a simple
dump_stack enough to get it reported to kerneloops.org, or is
more tweaking necessary?

> +}
> +
> +static int set_bios_corruption_check(char *arg)
> +{
> +	char *end;
> +
> +	bios_corruption_check = simple_strtol(arg, &end, 10);
> +
> +	return (*end == 0) ? 0 : -EINVAL;
> +}
> +early_param("bios_corruption_check", set_bios_corruption_check);
> +#endif
> +
> +/*
>   * Determine if we were loaded by an EFI loader.  If so, then we have also been
>   * passed the efi memmap, systab, etc., so we should use these data structures
>   * for initialization.  Note, the efi init code path is determined by the
> @@ -766,6 +848,10 @@
>  		max_low_pfn = max_pfn;
>  
>  	high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
> +#endif
> +
> +#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
> +	setup_bios_corruption_check();
>  #endif
>  
>  	/* max_pfn_mapped is updated here */
> ===================================================================
> --- a/arch/x86/mm/fault.c
> +++ b/arch/x86/mm/fault.c
> @@ -864,6 +864,8 @@
>   * Oops. The kernel tried to access some bad page. We'll have to
>   * terminate things with extreme prejudice.
>   */
> +	check_for_bios_corruption();
> +
>  #ifdef CONFIG_X86_32
>  	bust_spinlocks(1);
>  #else
> ===================================================================
> --- a/drivers/base/power/main.c
> +++ b/drivers/base/power/main.c
> @@ -254,6 +254,7 @@
>  
>  static void pm_dev_dbg(struct device *dev, pm_message_t state, char *info)
>  {
> +	check_for_bios_corruption();
>  	dev_dbg(dev, "%s%s%s\n", info, pm_verb(state.event),
>  		((state.event & PM_EVENT_SLEEP) && device_may_wakeup(dev)) ?
>  		", may wakeup" : "");
> ===================================================================
> --- a/include/linux/kernel.h
> +++ b/include/linux/kernel.h
> @@ -246,6 +246,18 @@
>  extern void add_taint(unsigned);
>  extern int root_mountflags;
>  
> +#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
> +/*
> + * This is obviously not a great place for this, but we want to be
> + * able to scatter it around anywhere in the kernel.
> + */
> +void check_for_bios_corruption(void);
> +#else
> +static inline void check_for_bios_corruption(void)
> +{
> +}
> +#endif
> +
>  /* Values used for system_state */
>  extern enum system_states {
>  	SYSTEM_BOOTING,

I'll give this or something like it a try on my machines later on,
32 and 64, to see if anything comes up which hasn't caused any
visible problem before (but will either need to do suspend+resume
on machines not tried before, or add in the periodic test).

I do wonder whether for 2.6.27 we should simply go back to the
previous kernel pagetable layout, so there's no regression while
we investigate the issue further.  But I don't recall how well-
defined that layout was, and whether your perturbation was just
in that one patch or not.

Is this the right moment for me to mention again that I'm not sure
your reuse of existing pagetables was quite right anyway: NX being
excluded from level2_ident_pgt, but wanted in the direct map?

Hugh

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC] x86: check for and defend against BIOS memory corruption
  2008-08-29  3:28   ` Jeremy Fitzhardinge
@ 2008-08-29  9:25     ` Alan Cox
  2008-08-29 10:13       ` Rafał Miłecki
  2008-08-29 14:18       ` Jeremy Fitzhardinge
  2008-08-29 20:31     ` Kasper Sandberg
  1 sibling, 2 replies; 35+ messages in thread
From: Alan Cox @ 2008-08-29  9:25 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: Yinghai Lu, Ingo Molnar, Rafał Miłecki, Alan Jenkins,
	Hugh Dickens, H. Peter Anvin, Linux Kernel Mailing List

> >>  - Clears the reserved memory so we can observe changes to it.

You ought to pick different values on different runs 0x00, 0xFF, 0xAA,
0x55 etc...

> Yeah, OK, but I think it should default to ON for now.  The problem is
> that we had two very different systems (Sony Vaio and Intel desktop)

So a zillion people should lose a chunk of RAM because of what is
probably an obscure bug in a single version of a piece of SMM code on two
systems total ?

That appears to be totally out of proportion - plus the defaults are
*irrelevant* for mass user coverage. If you want large scale coverage ask
the Fedora, OpenSuSE and Ubuntu people to turn it on for a testing kernel
release and report back.

Alan

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC] x86: check for and defend against BIOS memory corruption
  2008-08-29 10:13       ` Rafał Miłecki
@ 2008-08-29 10:06         ` Alan Cox
  2008-08-29 10:24         ` Hugh Dickins
  1 sibling, 0 replies; 35+ messages in thread
From: Alan Cox @ 2008-08-29 10:06 UTC (permalink / raw)
  To: Rafał Miłecki
  Cc: Jeremy Fitzhardinge, Yinghai Lu, Ingo Molnar, Alan Jenkins,
	Hugh Dickens, H. Peter Anvin, Linux Kernel Mailing List

> I understand that disabling chunk of RAM on every, even not affected,
> machine is far from perfect, but we have to remember about not
> advanced end-users.

Including not advanced end users without the bug ... which is probably
all but one or two people.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC] x86: check for and defend against BIOS memory corruption
  2008-08-29  9:25     ` Alan Cox
@ 2008-08-29 10:13       ` Rafał Miłecki
  2008-08-29 10:06         ` Alan Cox
  2008-08-29 10:24         ` Hugh Dickins
  2008-08-29 14:18       ` Jeremy Fitzhardinge
  1 sibling, 2 replies; 35+ messages in thread
From: Rafał Miłecki @ 2008-08-29 10:13 UTC (permalink / raw)
  To: Alan Cox
  Cc: Jeremy Fitzhardinge, Yinghai Lu, Ingo Molnar, Alan Jenkins,
	Hugh Dickens, H. Peter Anvin, Linux Kernel Mailing List

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset=UTF-8, Size: 992 bytes --]

Out of current discussion I tried using s2ram on patched kernel (I didnot try s2ram earlier, my problem was (un)plugging HDMI - some ACPIcode probably).
Corruption output is quite huge, I attached it to bug report:http://bugzilla.kernel.org/show_bug.cgi?id=11237http://bugzilla.kernel.org/attachment.cgi?id=17526

I do not know much about current discussion and fact if checking forcorruption should be enabled by default.
However, please note that end-user will not able to diagnose thatLinux doesn't work on his machine because of less-or-more broken BIOS.He will not get to know that he needs to enablebios_corruption_check=1 and edit grub to make it default.
I understand that disabling chunk of RAM on every, even not affected,machine is far from perfect, but we have to remember about notadvanced end-users.
-- RafaÅ‚ MiÅ‚eckiÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC] x86: check for and defend against BIOS memory corruption
  2008-08-29 10:13       ` Rafał Miłecki
  2008-08-29 10:06         ` Alan Cox
@ 2008-08-29 10:24         ` Hugh Dickins
  2008-08-29 11:54           ` Rafał Miłecki
  2008-08-29 14:08           ` Jeremy Fitzhardinge
  1 sibling, 2 replies; 35+ messages in thread
From: Hugh Dickins @ 2008-08-29 10:24 UTC (permalink / raw)
  To: Rafał Miłecki
  Cc: Alan Cox, Jeremy Fitzhardinge, Yinghai Lu, Ingo Molnar,
	Alan Jenkins, H. Peter Anvin, Linux Kernel Mailing List

[-- Attachment #1: Type: TEXT/PLAIN, Size: 1203 bytes --]

> On Fri, 29 Aug 2008, Rafał Miłecki wrote:
> 
> Out of current discussion I tried using s2ram on patched kernel (I did
> not try s2ram earlier, my problem was (un)plugging HDMI - some ACPI
> code probably).
> 
> Corruption output is quite huge, I attached it to bug report:
> http://bugzilla.kernel.org/show_bug.cgi?id=11237
> http://bugzilla.kernel.org/attachment.cgi?id=17526

Not quite the output we were expecting!  I've not got around to trying
it yet, so beware, but I think Jeremy's patch needs the following on top.
Or you may prefer to wait until one of us reports that it is now working
as intended.

--- a/arch/x86/kernel/setup.c	2008-08-29 11:17:16.000000000 +0100
+++ b/arch/x86/kernel/setup.c	2008-08-29 11:19:24.000000000 +0100
@@ -636,11 +636,12 @@ void check_for_bios_corruption(void)
 		unsigned long *addr = __va(scan_areas[i].addr);
 		unsigned long size = scan_areas[i].size;
 
-		for(; size; addr++, size--) {
+		for(; size; addr++, size -= sizeof(unsigned long)) {
 			if (!*addr)
 				continue;
 			printk(KERN_ERR "Corrupted low memory at %p (%lx phys) = %08lx\n",
 			       addr, __pa(addr), *addr);
+			*addr = 0;
 			corruption = 1;
 		}
 	}

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC] x86: check for and defend against BIOS memory corruption
  2008-08-29 10:24         ` Hugh Dickins
@ 2008-08-29 11:54           ` Rafał Miłecki
  2008-08-29 12:09             ` Alan Jenkins
  2008-08-29 14:08           ` Jeremy Fitzhardinge
  1 sibling, 1 reply; 35+ messages in thread
From: Rafał Miłecki @ 2008-08-29 11:54 UTC (permalink / raw)
  To: Hugh Dickins
  Cc: Alan Cox, Jeremy Fitzhardinge, Yinghai Lu, Ingo Molnar,
	Alan Jenkins, H. Peter Anvin, Linux Kernel Mailing List

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset=UTF-8, Size: 1815 bytes --]

2008/8/29 Hugh Dickins <hugh@veritas.com>:>> On Fri, 29 Aug 2008, Rafał Miłecki wrote:>>>> Out of current discussion I tried using s2ram on patched kernel (I did>> not try s2ram earlier, my problem was (un)plugging HDMI - some ACPI>> code probably).>>>> Corruption output is quite huge, I attached it to bug report:>> http://bugzilla.kernel.org/show_bug.cgi?id=11237>> http://bugzilla.kernel.org/attachment.cgi?id=17526>> Not quite the output we were expecting!  I've not got around to trying> it yet, so beware, but I think Jeremy's patch needs the following on top.> Or you may prefer to wait until one of us reports that it is now working> as intended.>> --- a/arch/x86/kernel/setup.c   2008-08-29 11:17:16.000000000 +0100> +++ b/arch/x86/kernel/setup.c   2008-08-29 11:19:24.000000000 +0100> @@ -636,11 +636,12 @@ void check_for_bios_corruption(void)>                unsigned long *addr = __va(scan_areas[i].addr);>                unsigned long size = scan_areas[i].size;>> -               for(; size; addr++, size--) {> +               for(; size; addr++, size -= sizeof(unsigned long)) {>                        if (!*addr)>                                continue;>                        printk(KERN_ERR "Corrupted low memory at %p (%lx phys) = %08lx\n",>                               addr, __pa(addr), *addr);> +                       *addr = 0;>                        corruption = 1;>                }>        }
I tried your patch anyway (after applying Jeremy's patch of course)and it doesn't seem to work. The only output is:scanning 2 areas for BIOS corruptionafter using s2ram. I do not get anyCorrupted low memory at*
-- RafaÅ‚ MiÅ‚eckiÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC] x86: check for and defend against BIOS memory corruption
  2008-08-29 11:54           ` Rafał Miłecki
@ 2008-08-29 12:09             ` Alan Jenkins
  2008-08-29 13:21               ` Hugh Dickins
  0 siblings, 1 reply; 35+ messages in thread
From: Alan Jenkins @ 2008-08-29 12:09 UTC (permalink / raw)
  To: Rafał Miłecki
  Cc: Hugh Dickins, Alan Cox, Jeremy Fitzhardinge, Yinghai Lu,
	Ingo Molnar, H. Peter Anvin, Linux Kernel Mailing List

Rafał Miłecki wrote:
> 2008/8/29 Hugh Dickins <hugh@veritas.com>:
>   
>>> On Fri, 29 Aug 2008, Rafał Miłecki wrote:
>>>
>>> Out of current discussion I tried using s2ram on patched kernel (I did
>>> not try s2ram earlier, my problem was (un)plugging HDMI - some ACPI
>>> code probably).
>>>
>>> Corruption output is quite huge, I attached it to bug report:
>>> http://bugzilla.kernel.org/show_bug.cgi?id=11237
>>> http://bugzilla.kernel.org/attachment.cgi?id=17526
>>>       
>> Not quite the output we were expecting!  I've not got around to trying
>> it yet, so beware, but I think Jeremy's patch needs the following on top.
>> Or you may prefer to wait until one of us reports that it is now working
>> as intended.
>>
>> --- a/arch/x86/kernel/setup.c   2008-08-29 11:17:16.000000000 +0100
>> +++ b/arch/x86/kernel/setup.c   2008-08-29 11:19:24.000000000 +0100
>> @@ -636,11 +636,12 @@ void check_for_bios_corruption(void)
>>                unsigned long *addr = __va(scan_areas[i].addr);
>>                unsigned long size = scan_areas[i].size;
>>
>> -               for(; size; addr++, size--) {
>> +               for(; size; addr++, size -= sizeof(unsigned long)) {
>>                        if (!*addr)
>>                                continue;
>>                        printk(KERN_ERR "Corrupted low memory at %p (%lx phys) = %08lx\n",
>>                               addr, __pa(addr), *addr);
>> +                       *addr = 0;
>>                        corruption = 1;
>>                }
>>        }
>>     
>
> I tried your patch anyway (after applying Jeremy's patch of course)
> and it doesn't seem to work. The only output is:
> scanning 2 areas for BIOS corruption
> after using s2ram. I do not get any
> Corrupted low memory at*
>   
It seemed to work for me.  Did you remember to plug HDMI to trigger the
corruption before you used s2ram?

[   71.663828] Back to C!
[   71.663828] Corrupted low memory at ffff8800000083e8 (83e8 phys) =
803c85370cfc0000
[   71.663828] Corrupted low memory at ffff8800000083f0 (83f0 phys) =
00003000
[   71.663828] Pid: 7335, comm: pm-suspend Not tainted
2.6.27-rc4-00216-ge1c9c9d-dirty #152
[   71.663828]
[   71.663828] Call Trace:
[   71.663828]  [<ffffffff8020fdd5>] check_for_bios_corruption+0xb5/0xd0
[   71.663828]  [<ffffffff803a3e1a>] pm_dev_dbg+0x2a/0xa0
[   71.663828]  [<ffffffff803a49c2>] dpm_power_up+0x32/0xf0

Alan

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC] x86: check for and defend against BIOS memory corruption
  2008-08-29 12:09             ` Alan Jenkins
@ 2008-08-29 13:21               ` Hugh Dickins
  2008-08-29 16:30                 ` Rafał Miłecki
  2008-08-29 17:39                 ` Rafał Miłecki
  0 siblings, 2 replies; 35+ messages in thread
From: Hugh Dickins @ 2008-08-29 13:21 UTC (permalink / raw)
  To: Alan Jenkins
  Cc: Rafał Miłecki, Alan Cox, Jeremy Fitzhardinge,
	Yinghai Lu, Ingo Molnar, H. Peter Anvin,
	Linux Kernel Mailing List

[-- Attachment #1: Type: TEXT/PLAIN, Size: 7337 bytes --]

On Fri, 29 Aug 2008, Alan Jenkins wrote:
> Rafał Miłecki wrote:
> >
> > I tried your patch anyway (after applying Jeremy's patch of course)
> > and it doesn't seem to work. The only output is:
> > scanning 2 areas for BIOS corruption
> > after using s2ram. I do not get any
> > Corrupted low memory at*
> >   
> It seemed to work for me.  Did you remember to plug HDMI to trigger the
> corruption before you used s2ram?

I hope that's what got missed.

Here's my version of Jeremy's patch, that I've now tested on my machines,
as x86_32 and as x86_64.  It addresses none of the points Alan Cox made,
and it stays silent for me, even after suspend+resume, unless I actually
introduce corruption myself.  Omits Jeremy's check in fault.c, but does
a check every minute, so should soon detect Rafał's HDMI corruption
without any need to suspend+resume.

Hugh

--- 2.6.27-rc5/Documentation/kernel-parameters.txt	2008-08-29 01:02:34.000000000 +0100
+++ linux/Documentation/kernel-parameters.txt	2008-08-29 11:17:16.000000000 +0100
@@ -360,6 +360,11 @@ and is between 256 and 4096 characters. 
 			Format: <io>,<irq>,<mode>
 			See header of drivers/net/hamradio/baycom_ser_hdx.c.
 
+	bios_corruption_check=0/1 [X86]
+			Some BIOSes seem to corrupt the first 64k of memory
+			when doing things like suspend/resume.  Setting this
+			option will scan the memory looking for corruption.
+
 	boot_delay=	Milliseconds to delay each printk during boot.
 			Values larger than 10 seconds (10000) are changed to
 			no delay (0).
--- 2.6.27-rc5/arch/x86/Kconfig	2008-08-29 01:02:35.000000000 +0100
+++ linux/arch/x86/Kconfig	2008-08-29 11:17:16.000000000 +0100
@@ -201,6 +201,9 @@ config X86_TRAMPOLINE
 	depends on X86_SMP || (X86_VOYAGER && SMP) || (64BIT && ACPI_SLEEP)
 	default y
 
+config X86_CHECK_BIOS_CORRUPTION
+        def_bool y
+
 config KTIME_SCALAR
 	def_bool X86_32
 source "init/Kconfig"
--- 2.6.27-rc5/arch/x86/kernel/setup.c	2008-08-29 01:02:35.000000000 +0100
+++ linux/arch/x86/kernel/setup.c	2008-08-29 13:50:19.000000000 +0100
@@ -579,6 +579,106 @@ static struct x86_quirks default_x86_qui
 struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
 
 /*
+ * Some BIOSes seem to corrupt the low 64k of memory during events
+ * like suspend/resume and unplugging an HDMI cable.  Reserve all
+ * remaining free memory in that area and fill it with a distinct
+ * pattern.
+ */
+#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
+#define MAX_SCAN_AREAS	8
+static struct e820entry scan_areas[MAX_SCAN_AREAS];
+static int num_scan_areas;
+
+static void __init setup_bios_corruption_check(void)
+{
+	u64 addr = PAGE_SIZE;	/* assume first page is reserved anyway */
+
+	while (addr < 0x10000 && num_scan_areas < MAX_SCAN_AREAS) {
+		u64 size;
+		addr = find_e820_area_size(addr, &size, PAGE_SIZE);
+
+		if (addr == 0)
+			break;
+
+		if ((addr + size) > 0x10000)
+			size = 0x10000 - addr;
+
+		if (size == 0)
+			break;
+
+		e820_update_range(addr, size, E820_RAM, E820_RESERVED);
+		scan_areas[num_scan_areas].addr = addr;
+		scan_areas[num_scan_areas].size = size;
+		num_scan_areas++;
+
+		/* Assume we've already mapped this early memory */
+		memset(__va(addr), 0, size);
+
+		addr += size;
+	}
+
+	printk(KERN_INFO "scanning %d areas for BIOS corruption\n",
+	       num_scan_areas);
+	update_e820();
+}
+
+static int __read_mostly bios_corruption_check = 1;
+static struct timer_list periodic_check_timer;
+
+void check_for_bios_corruption(void)
+{
+	int i;
+	int corruption = 0;
+
+	if (!bios_corruption_check)
+		return;
+
+	for (i = 0; i < num_scan_areas; i++) {
+		unsigned int *addr = __va(scan_areas[i].addr);
+		unsigned long size = scan_areas[i].size;
+
+		for (; size; addr++, size -= sizeof(unsigned int)) {
+			if (!*addr)
+				continue;
+			printk(KERN_ERR "Corrupted low memory at %p (%lx phys) = %08x\n",
+			       addr, __pa(addr), *addr);
+			*addr = 0;
+			corruption = 1;
+		}
+	}
+
+	if (corruption)
+		dump_stack();
+}
+
+static void periodic_check_for_corruption(unsigned long data)
+{
+	check_for_bios_corruption();
+	mod_timer(&periodic_check_timer, jiffies + 60*HZ);
+}
+
+void start_periodic_check_for_corruption(void)
+{
+	if (!bios_corruption_check)
+		return;
+
+	init_timer(&periodic_check_timer);
+	periodic_check_timer.function = &periodic_check_for_corruption;
+	periodic_check_for_corruption(0);
+}
+
+static int set_bios_corruption_check(char *arg)
+{
+	char *end;
+
+	bios_corruption_check = simple_strtol(arg, &end, 10);
+
+	return (*end == 0) ? 0 : -EINVAL;
+}
+early_param("bios_corruption_check", set_bios_corruption_check);
+#endif
+
+/*
  * Determine if we were loaded by an EFI loader.  If so, then we have also been
  * passed the efi memmap, systab, etc., so we should use these data structures
  * for initialization.  Note, the efi init code path is determined by the
@@ -750,6 +850,10 @@ void __init setup_arch(char **cmdline_p)
 	high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
 #endif
 
+#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
+	setup_bios_corruption_check();
+#endif
+
 	/* max_pfn_mapped is updated here */
 	max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
 	max_pfn_mapped = max_low_pfn_mapped;
--- 2.6.27-rc5/arch/x86/mm/init_32.c	2008-07-29 04:24:15.000000000 +0100
+++ linux/arch/x86/mm/init_32.c	2008-08-29 11:52:21.000000000 +0100
@@ -907,6 +907,8 @@ void __init mem_init(void)
 	int codesize, reservedpages, datasize, initsize;
 	int tmp;
 
+	start_periodic_check_for_corruption();
+
 #ifdef CONFIG_FLATMEM
 	BUG_ON(!mem_map);
 #endif
--- 2.6.27-rc5/arch/x86/mm/init_64.c	2008-08-29 01:02:35.000000000 +0100
+++ linux/arch/x86/mm/init_64.c	2008-08-29 11:53:09.000000000 +0100
@@ -769,6 +769,8 @@ void __init mem_init(void)
 {
 	long codesize, reservedpages, datasize, initsize;
 
+	start_periodic_check_for_corruption();
+
 	pci_iommu_alloc();
 
 	/* clear_bss() already clear the empty_zero_page */
--- 2.6.27-rc5/drivers/base/power/main.c	2008-08-29 01:02:35.000000000 +0100
+++ linux/drivers/base/power/main.c	2008-08-29 11:17:16.000000000 +0100
@@ -254,6 +254,7 @@ static char *pm_verb(int event)
 
 static void pm_dev_dbg(struct device *dev, pm_message_t state, char *info)
 {
+	check_for_bios_corruption();
 	dev_dbg(dev, "%s%s%s\n", info, pm_verb(state.event),
 		((state.event & PM_EVENT_SLEEP) && device_may_wakeup(dev)) ?
 		", may wakeup" : "");
--- 2.6.27-rc5/include/linux/kernel.h	2008-08-13 04:14:50.000000000 +0100
+++ linux/include/linux/kernel.h	2008-08-29 11:49:23.000000000 +0100
@@ -240,6 +240,22 @@ extern const char *print_tainted(void);
 extern void add_taint(unsigned);
 extern int root_mountflags;
 
+#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
+/*
+ * This is obviously not a great place for this, but we want to be
+ * able to scatter it around anywhere in the kernel.
+ */
+void check_for_bios_corruption(void);
+void start_periodic_check_for_corruption(void);
+#else
+static inline void check_for_bios_corruption(void)
+{
+}
+static inline void start_periodic_check_for_corruption(void)
+{
+}
+#endif
+
 /* Values used for system_state */
 extern enum system_states {
 	SYSTEM_BOOTING,

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC] x86: check for and defend against BIOS memory corruption
  2008-08-29 10:24         ` Hugh Dickins
  2008-08-29 11:54           ` Rafał Miłecki
@ 2008-08-29 14:08           ` Jeremy Fitzhardinge
  1 sibling, 0 replies; 35+ messages in thread
From: Jeremy Fitzhardinge @ 2008-08-29 14:08 UTC (permalink / raw)
  To: Hugh Dickins
  Cc: Rafał Miłecki, Alan Cox, Yinghai Lu, Ingo Molnar,
	Alan Jenkins, H. Peter Anvin, Linux Kernel Mailing List

Hugh Dickins wrote:
> Not quite the output we were expecting!  I've not got around to trying
> it yet, so beware, but I think Jeremy's patch needs the following on top.
> Or you may prefer to wait until one of us reports that it is now working
> as intended.
>
> --- a/arch/x86/kernel/setup.c	2008-08-29 11:17:16.000000000 +0100
> +++ b/arch/x86/kernel/setup.c	2008-08-29 11:19:24.000000000 +0100
> @@ -636,11 +636,12 @@ void check_for_bios_corruption(void)
>  		unsigned long *addr = __va(scan_areas[i].addr);
>  		unsigned long size = scan_areas[i].size;
>  
> -		for(; size; addr++, size--) {
> +		for(; size; addr++, size -= sizeof(unsigned long)) {
>   

Oops, yes.

>  			if (!*addr)
>  				continue;
>  			printk(KERN_ERR "Corrupted low memory at %p (%lx phys) = %08lx\n",
>  			       addr, __pa(addr), *addr);
> +			*addr = 0;
>   

Good idea.

>  			corruption = 1;
>  		}
>  	}


    J

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC] x86: check for and defend against BIOS memory corruption
  2008-08-29  9:25     ` Alan Cox
  2008-08-29 10:13       ` Rafał Miłecki
@ 2008-08-29 14:18       ` Jeremy Fitzhardinge
  1 sibling, 0 replies; 35+ messages in thread
From: Jeremy Fitzhardinge @ 2008-08-29 14:18 UTC (permalink / raw)
  To: Alan Cox
  Cc: Yinghai Lu, Ingo Molnar, Rafa? Mi?ecki, Alan Jenkins,
	Hugh Dickens, H. Peter Anvin, Linux Kernel Mailing List

Alan Cox wrote:
>>>>  - Clears the reserved memory so we can observe changes to it.
>>>>         
>
> You ought to pick different values on different runs 0x00, 0xFF, 0xAA,
> 0x55 etc...
>   

Yes, that wouldn't be a bad idea.  The corruption we've seen so far is
0->non 0, but we haven't looked for zeroing.

>> Yeah, OK, but I think it should default to ON for now.  The problem is
>> that we had two very different systems (Sony Vaio and Intel desktop)
>>     
>
> So a zillion people should lose a chunk of RAM because of what is
> probably an obscure bug in a single version of a piece of SMM code on two
> systems total ?
>   

Well, we just don't know.  We have two systems which started reporting
problems when I made a small change to how the initial pagetables are
allocated.  Before that they appeared to work fine, though in reality it
was just some other part of the address space being corrupted.  Who
knows how many systems are out there "working fine", except that some
obscure address in the user address space now allows you to write into
kernel memory?

The two systems are *completely different*.  One has a Phoenix BIOS, the
other is AMI.  One is a Sony Vaio, the other is an OEM Intel desktop. 
One corrupts memory on unplugging a HDMI cable, the other corrupts on
suspend/resume.  I don't think this is reassuring, or indicates the
problem has limited scope.

> That appears to be totally out of proportion - plus the defaults are
> *irrelevant* for mass user coverage. If you want large scale coverage ask
> the Fedora, OpenSuSE and Ubuntu people to turn it on for a testing kernel
> release and report back.

The idea is to leave it on by default for a while - maybe a couple of
-rc releases - and see what turns up.  We can turn it off for a release
kernel if people really object, though I'd hope something like Fedora
Rawhide would leave it on.

    J

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC] x86: check for and defend against BIOS memory corruption
  2008-08-29  8:14 ` Hugh Dickins
@ 2008-08-29 14:48   ` Jeremy Fitzhardinge
  2008-08-29 17:20     ` H. Peter Anvin
  2008-09-08 11:35     ` Hugh Dickins
  2008-08-29 17:02   ` H. Peter Anvin
  2008-08-29 17:03   ` H. Peter Anvin
  2 siblings, 2 replies; 35+ messages in thread
From: Jeremy Fitzhardinge @ 2008-08-29 14:48 UTC (permalink / raw)
  To: Hugh Dickins
  Cc: Ingo Molnar, Rafał Miłecki, Alan Jenkins,
	H. Peter Anvin, Linux Kernel Mailing List

Hugh Dickins wrote:
> Thanks for taking this on, Jeremy: I had too many doubts to do so.
>
> On Thu, 28 Aug 2008, Jeremy Fitzhardinge wrote:
>   
>> Some BIOSes have been observed to corrupt memory in the low 64k.
>>     
>
> hpa introduced the 64k idea, and we've all been repeating it;
> but I've not heard the reasoning behind it.  Is it a fundamental
> addressing limitation within the BIOS memory model?  Or a case
> that Windows treats the bottom 64k as scratch, so BIOS testers
> won't notice if they corrupt it?
>
> The two instances of corruption we've been studying have indeed
> been below 64k (one in page 8 and one in page 11), but that's
> because they were both recognizable corruptions of direct map PMDs.
>
> If there is not a very strong justification for that 64k limit,
> then I don't think this approach will be very useful, and we should
> simply continue to rely on analyzing corruption when it appears, and
> recommend memmap= as a way of avoiding it once analyzed.  If there
> is a strong justification for it, please dispel my ignorance!
>   

I don't think there's been a strong rationale.  64k is significant
because it's the size of an old 16-bit real-mode segment, and perhaps
the bios code in question is 16-bit code.

But I think it's really a question of limiting the scope of the problem:
yes, the BIOS could corrupt any memory anywhere, but what can we
possibly do about that?  Such a machine is as useless as a system with
intermittent faulty memory hardware.

We have to assume that it is basically possible to use the machine, and
that presumably whatever bug the BIOS has isn't making Windows crash. 
Windows might be avoiding the low 64k for some ancient-history DOS
reason, or perhaps it's just full of boot code which isn't used again?

It would be easy to add another kernel parameter to select how much
memory to reserve for corruption checking.  64k?  1M?

>> This patch does two things:
>>  - Reserves all memory which does not have to be in that area, to
>>    prevent it from being used as general memory by the kernel.  Things
>>    like the SMP trampoline are still in the memory, however.
>>  - Clears the reserved memory so we can observe changes to it.
>>  - Adds a function check_for_bios_corruption() which checks and reports on
>>    memory becoming unexpectedly non-zero.  Currently it's called in the
>>    x86 fault handler, and the powermanagement debug output.
>>
>> RFC: What other places should we check for corruption in?
>>     
>
> I don't know: the easy answer would be just to do it once every minute.
> As the patch stands, we'll only learn more on machines going through
> suspend+resume (disk or ram).  If the corruption avoidance works,
> then the fault case should never trigger.
>   

Right.  Once a minute sounds good, though it might be a bit too long to
correlate the corruption to a specific event.

>> [ Alan, Rafał: could you check you see:
>>    1: corruption messages
>>    2: no crashes
>>   Thanks -J
>> ]
>>
>> Signed-off-by: Jeremy Fitzhardinge <jeremy@goop.org>
>> Cc: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
>> Cc: Hugh Dickens <hugh@veritas.com>
>>     
>
>            Dickins
>
>   
>> Cc: Ingo Molnar <mingo@elte.hu>
>> Cc: Rafael J. Wysocki <rjw@sisk.pl>
>> Cc: Rafał Miłecki <zajec5@gmail.com>
>> Cc: H. Peter Anvin <hpa@zytor.com>
>> ---
>>  Documentation/kernel-parameters.txt |    5 ++
>>  arch/x86/Kconfig                    |    3 +
>>  arch/x86/kernel/setup.c             |   86 +++++++++++++++++++++++++++++++++++
>>  arch/x86/mm/fault.c                 |    2 
>>  drivers/base/power/main.c           |    1 
>>  include/linux/kernel.h              |   12 ++++
>>  6 files changed, 109 insertions(+)
>>
>> ===================================================================
>> --- a/Documentation/kernel-parameters.txt
>> +++ b/Documentation/kernel-parameters.txt
>> @@ -359,6 +359,11 @@
>>  			BayCom Serial Port AX.25 Modem (Half Duplex Mode)
>>  			Format: <io>,<irq>,<mode>
>>  			See header of drivers/net/hamradio/baycom_ser_hdx.c.
>> +
>> +	bios_corruption_check=0/1 [X86]
>> +			Some BIOSes seem to corrupt the first 64k of memory
>> +			when doing things like suspend/resume.  Setting this
>> +			option will scan the memory looking for corruption.
>>     
>
> It's actually a bottom_of_memory corruption_check: it would pick
> up corruption there whether it's caused by the BIOS or not.
> The boot parameter description ought to refer to the config
> option: ah, the config option is always on for x86, hmm.
>   

Yeah, I didn't bother to make it switchable yet.

> If the 64k is in any doubt, maybe the corruption_check boot option
> should specify limiting address rather than just off/on.
>   

Yes, that sounds good.  Also, it might be worth being able to set the
rate.  Doing it once every 10 seconds wouldn't add too much overhead,
and once a second would be reasonable if you think there's an actual
problem.

>>  
>>  	boot_delay=	Milliseconds to delay each printk during boot.
>>  			Values larger than 10 seconds (10000) are changed to
>> ===================================================================
>> --- a/arch/x86/Kconfig
>> +++ b/arch/x86/Kconfig
>> @@ -203,6 +203,9 @@
>>  	bool
>>  	depends on X86_SMP || (X86_VOYAGER && SMP) || (64BIT && ACPI_SLEEP)
>>  	default y
>> +
>> +config X86_CHECK_BIOS_CORRUPTION
>> +        def_bool y
>>     
>
> Always on?  For the moment, perhaps.  I'm very pleased to see you've
> set it on for x86_32 as well as for x86_64, I'd been wanting to ask
> what's been happening on 32-bit.
>   

I couldn't see any reason the problem would be restricted to 64-bit.  It
could just be lurking in a rarely used corner of the address space.

>>  
>>  config KTIME_SCALAR
>>  	def_bool X86_32
>> ===================================================================
>> --- a/arch/x86/kernel/setup.c
>> +++ b/arch/x86/kernel/setup.c
>> @@ -582,6 +582,88 @@
>>  struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
>>  
>>  /*
>> + * Some BIOSes seem to corrupt the low 64k of memory during events
>> + * like suspend/resume and unplugging an HDMI cable.  Reserve all
>> + * remaining free memory in that area and fill it with a distinct
>> + * pattern.
>> + */
>> +#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
>> +#define MAX_SCAN_AREAS	8
>> +static struct e820entry scan_areas[MAX_SCAN_AREAS];
>> +static int num_scan_areas;
>> +
>> +static void __init setup_bios_corruption_check(void)
>> +{
>> +	u64 addr = PAGE_SIZE;	/* assume first page is reserved anyway */
>> +
>> +	while(addr < 0x10000 && num_scan_areas < MAX_SCAN_AREAS) {
>> +		u64 size;
>> +		addr = find_e820_area_size(addr, &size, PAGE_SIZE);
>> +
>> +		if (addr == 0)
>> +			break;
>> +
>> +		if ((addr + size) > 0x10000)
>> +			size = 0x10000 - addr;
>>     
>
> Here (and the patch description) might be the right place to justify 64k.
>   

I'm inclined to make it variable.

>> +
>> +		if (size == 0)
>> +			break;
>> +
>> +		e820_update_range(addr, size, E820_RAM, E820_RESERVED);
>> +		scan_areas[num_scan_areas].addr = addr;
>> +		scan_areas[num_scan_areas].size = size;
>> +		num_scan_areas++;
>> +
>> +		/* Assume we've already mapped this early memory */
>> +		memset(__va(addr), 0, size);
>> +
>> +		addr += size;
>> +	}
>> +
>> +	printk(KERN_INFO "scanning %d areas for BIOS corruption\n",
>> +	       num_scan_areas);
>> +	update_e820();
>> +}
>> +
>> +static int __read_mostly bios_corruption_check = 1;
>> +
>> +void check_for_bios_corruption(void)
>> +{
>> +	int i;
>> +	int corruption = 0;
>> +
>> +	if (!bios_corruption_check)
>> +		return;
>> +
>> +	for(i = 0; i < num_scan_areas; i++) {
>> +		unsigned long *addr = __va(scan_areas[i].addr);
>>     
>
> Small point, but since we're doing the same on 32-bit and 64-bit,
> I think it would be better to operate on unsigned ints, to get
> the same kind of printout in the two cases.
>   

OK.

>> +		unsigned long size = scan_areas[i].size;
>> +
>> +		for(; size; addr++, size--) {
>> +			if (!*addr)
>> +				continue;
>> +			printk(KERN_ERR "Corrupted low memory at %p (%lx phys) = %08lx\n",
>> +			       addr, __pa(addr), *addr);
>>     
>
> Don't you need to reset *addr to 0 here, to avoid noise ever after?
>   

Yes.

>> +			corruption = 1;
>> +		}
>> +	}
>> +
>> +	if (corruption)
>> +		dump_stack();
>>     
>
> The purpose of the dump_stack being to insert a recognizable and
> irritating spew into the log, prompting people to report these cases:
> the stacktrace itself is unlikely to be relevant.  Is a simple
> dump_stack enough to get it reported to kerneloops.org, or is
> more tweaking necessary?
>   

Well, no, I was thinking about it would give a clue about what went
wrong.  If you see the spew out of the pm code, then it was likely
suspend/resume.  If it's out of the timer function then it won't tell
you much other than "something else".  If we put it elsewhere (even if
temporarily for detecting other suspected corruption symptoms), then it
will be useful for distinguishing those.

An alternative would be to pass a flag to say whether a backtrace would
likely be useful.  Or wrap it in a macro to provide file/line info.

>> +}
>> +
>> +static int set_bios_corruption_check(char *arg)
>> +{
>> +	char *end;
>> +
>> +	bios_corruption_check = simple_strtol(arg, &end, 10);
>> +
>> +	return (*end == 0) ? 0 : -EINVAL;
>> +}
>> +early_param("bios_corruption_check", set_bios_corruption_check);
>> +#endif
>> +
>> +/*
>>   * Determine if we were loaded by an EFI loader.  If so, then we have also been
>>   * passed the efi memmap, systab, etc., so we should use these data structures
>>   * for initialization.  Note, the efi init code path is determined by the
>> @@ -766,6 +848,10 @@
>>  		max_low_pfn = max_pfn;
>>  
>>  	high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
>> +#endif
>> +
>> +#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
>> +	setup_bios_corruption_check();
>>  #endif
>>  
>>  	/* max_pfn_mapped is updated here */
>> ===================================================================
>> --- a/arch/x86/mm/fault.c
>> +++ b/arch/x86/mm/fault.c
>> @@ -864,6 +864,8 @@
>>   * Oops. The kernel tried to access some bad page. We'll have to
>>   * terminate things with extreme prejudice.
>>   */
>> +	check_for_bios_corruption();
>> +
>>  #ifdef CONFIG_X86_32
>>  	bust_spinlocks(1);
>>  #else
>> ===================================================================
>> --- a/drivers/base/power/main.c
>> +++ b/drivers/base/power/main.c
>> @@ -254,6 +254,7 @@
>>  
>>  static void pm_dev_dbg(struct device *dev, pm_message_t state, char *info)
>>  {
>> +	check_for_bios_corruption();
>>  	dev_dbg(dev, "%s%s%s\n", info, pm_verb(state.event),
>>  		((state.event & PM_EVENT_SLEEP) && device_may_wakeup(dev)) ?
>>  		", may wakeup" : "");
>> ===================================================================
>> --- a/include/linux/kernel.h
>> +++ b/include/linux/kernel.h
>> @@ -246,6 +246,18 @@
>>  extern void add_taint(unsigned);
>>  extern int root_mountflags;
>>  
>> +#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
>> +/*
>> + * This is obviously not a great place for this, but we want to be
>> + * able to scatter it around anywhere in the kernel.
>> + */
>> +void check_for_bios_corruption(void);
>> +#else
>> +static inline void check_for_bios_corruption(void)
>> +{
>> +}
>> +#endif
>> +
>>  /* Values used for system_state */
>>  extern enum system_states {
>>  	SYSTEM_BOOTING,
>>     
>
> I'll give this or something like it a try on my machines later on,
> 32 and 64, to see if anything comes up which hasn't caused any
> visible problem before (but will either need to do suspend+resume
> on machines not tried before, or add in the periodic test).
>
> I do wonder whether for 2.6.27 we should simply go back to the
> previous kernel pagetable layout, so there's no regression while
> we investigate the issue further.  But I don't recall how well-
> defined that layout was, and whether your perturbation was just
> in that one patch or not.
>   

That would break Xen.  I needed to do the pagetable reuse to make sure
that the Xen domain-builder pagetables are preserved.

> Is this the right moment for me to mention again that I'm not sure
> your reuse of existing pagetables was quite right anyway: NX being
> excluded from level2_ident_pgt, but wanted in the direct map?
>   

We could add NX.  What's the behaviour of setting NX in a
non-NX-supporting CPU?  I don't think it would trigger a "reserved bit"
exception (the other high pte flags don't).  Or failing that, we could
mask out NX once we've worked out the CPU doesn't support it (at the
same time it relocates the pagetables to the kernel's load-time address).

    J

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC] x86: check for and defend against BIOS memory corruption
  2008-08-29 13:21               ` Hugh Dickins
@ 2008-08-29 16:30                 ` Rafał Miłecki
  2008-08-29 17:39                 ` Rafał Miłecki
  1 sibling, 0 replies; 35+ messages in thread
From: Rafał Miłecki @ 2008-08-29 16:30 UTC (permalink / raw)
  To: Hugh Dickins
  Cc: Alan Jenkins, Alan Cox, Jeremy Fitzhardinge, Yinghai Lu,
	Ingo Molnar, H. Peter Anvin, Linux Kernel Mailing List

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset=UTF-8, Size: 877 bytes --]

2008/8/29 Hugh Dickins <hugh@veritas.com>:> On Fri, 29 Aug 2008, Alan Jenkins wrote:>> Rafał Miłecki wrote:>> >>> > I tried your patch anyway (after applying Jeremy's patch of course)>> > and it doesn't seem to work. The only output is:>> > scanning 2 areas for BIOS corruption>> > after using s2ram. I do not get any>> > Corrupted low memory at*>> >>> It seemed to work for me.  Did you remember to plug HDMI to trigger the>> corruption before you used s2ram?>> I hope that's what got missed.
Ah, stupid mistake, sorry! Indeed, using HDMI and s2ram later works fine:
Corrupted low memory at ffff88000000be98 (be98 phys) = b02a000400000000 [<ffffffff8020fc9c>] check_for_bios_corruption+0x94/0xa0
-- RafaÅ‚ MiÅ‚eckiÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC] x86: check for and defend against BIOS memory corruption
  2008-08-29  8:14 ` Hugh Dickins
  2008-08-29 14:48   ` Jeremy Fitzhardinge
@ 2008-08-29 17:02   ` H. Peter Anvin
  2008-08-29 17:03   ` H. Peter Anvin
  2 siblings, 0 replies; 35+ messages in thread
From: H. Peter Anvin @ 2008-08-29 17:02 UTC (permalink / raw)
  To: Hugh Dickins
  Cc: Jeremy Fitzhardinge, Ingo Molnar, Rafał Miłecki,
	Alan Jenkins, Linux Kernel Mailing List

Hugh Dickins wrote:
> 
> hpa introduced the 64k idea, and we've all been repeating it;
> but I've not heard the reasoning behind it.  Is it a fundamental
> addressing limitation within the BIOS memory model?  Or a case
> that Windows treats the bottom 64k as scratch, so BIOS testers
> won't notice if they corrupt it?
> 
> The two instances of corruption we've been studying have indeed
> been below 64k (one in page 8 and one in page 11), but that's
> because they were both recognizable corruptions of direct map PMDs.
> 
> If there is not a very strong justification for that 64k limit,
> then I don't think this approach will be very useful, and we should
> simply continue to rely on analyzing corruption when it appears, and
> recommend memmap= as a way of avoiding it once analyzed.  If there
> is a strong justification for it, please dispel my ignorance!
> 

The 64K number was empirical, of course.  The bottom 64K is somewhat 
special, however, in that it is what you can address in real mode (as 
opposed to big real mode) with your segments parked at zero, so you end 
up with something approaching a flat real mode.  Especially the first 
32K (below 0x7c00) are frequently used by various BIOS items, but I 
believe the corruption observed was at 0x8000, so it's beyond even this 
first barrier.

Obviously, it's extremely hard to predict where BIOS vendors will have 
choosen to scribble, but the observations in this particular case seemed 
to finger this particular area.

	-hpa

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC] x86: check for and defend against BIOS memory corruption
  2008-08-29  8:14 ` Hugh Dickins
  2008-08-29 14:48   ` Jeremy Fitzhardinge
  2008-08-29 17:02   ` H. Peter Anvin
@ 2008-08-29 17:03   ` H. Peter Anvin
  2 siblings, 0 replies; 35+ messages in thread
From: H. Peter Anvin @ 2008-08-29 17:03 UTC (permalink / raw)
  To: Hugh Dickins
  Cc: Jeremy Fitzhardinge, Ingo Molnar, Rafał Miłecki,
	Alan Jenkins, Linux Kernel Mailing List

Hugh Dickins wrote:
> 
> hpa introduced the 64k idea, and we've all been repeating it;
> but I've not heard the reasoning behind it.  Is it a fundamental
> addressing limitation within the BIOS memory model?  Or a case
> that Windows treats the bottom 64k as scratch, so BIOS testers
> won't notice if they corrupt it?
> 

I should point out that I have seen one particular bug quite a few times 
poking around with boot loaders: the BIOS accesses memory at an 
otherwise valid address, but with the segment base set to either zero or 
0x400 instead of whatever it should have been.

	-hpa

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC] x86: check for and defend against BIOS memory corruption
  2008-08-29 14:48   ` Jeremy Fitzhardinge
@ 2008-08-29 17:20     ` H. Peter Anvin
  2008-09-08 11:35     ` Hugh Dickins
  1 sibling, 0 replies; 35+ messages in thread
From: H. Peter Anvin @ 2008-08-29 17:20 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: Hugh Dickins, Ingo Molnar, Rafał Miłecki, Alan Jenkins,
	Linux Kernel Mailing List

Jeremy Fitzhardinge wrote:
> 
> It would be easy to add another kernel parameter to select how much
> memory to reserve for corruption checking.  64k?  1M?
> 

Or just make the boot_corruption_check parameter take a range instead of 
a boolean.

	-hpa

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC] x86: check for and defend against BIOS memory corruption
  2008-08-29 13:21               ` Hugh Dickins
  2008-08-29 16:30                 ` Rafał Miłecki
@ 2008-08-29 17:39                 ` Rafał Miłecki
  2008-09-04 19:42                   ` Rafał Miłecki
  1 sibling, 1 reply; 35+ messages in thread
From: Rafał Miłecki @ 2008-08-29 17:39 UTC (permalink / raw)
  To: Hugh Dickins
  Cc: Alan Jenkins, Alan Cox, Jeremy Fitzhardinge, Yinghai Lu,
	Ingo Molnar, H. Peter Anvin, Linux Kernel Mailing List

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset=UTF-8, Size: 1022 bytes --]

2008/8/29 Hugh Dickins <hugh@veritas.com>:> Here's my version of Jeremy's patch, that I've now tested on my machines,> as x86_32 and as x86_64.  It addresses none of the points Alan Cox made,> and it stays silent for me, even after suspend+resume, unless I actually> introduce corruption myself.  Omits Jeremy's check in fault.c, but does> a check every minute, so should soon detect Rafał's HDMI corruption> without any need to suspend+resume.
Your periodic test works fine:
Corrupted low memory at ffff88000000be9c (be9c phys) = b02a0004 <IRQ>  [<ffffffff8020fc9b>] check_for_bios_corruption+0x93/0x9f [<ffffffff8020fca7>] ? periodic_check_for_corruption+0x0/0x25 [<ffffffff8020fcb0>] periodic_check_for_corruption+0x9/0x25
By the way I confirmed this bug on Sony Vaio FW11M (my one is FW11S).Probably more machines from FW11* are affected.
-- RafaÅ‚ MiÅ‚eckiÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC] x86: check for and defend against BIOS memory corruption
  2008-08-29  3:28   ` Jeremy Fitzhardinge
  2008-08-29  9:25     ` Alan Cox
@ 2008-08-29 20:31     ` Kasper Sandberg
  2008-08-30  1:15       ` Jeremy Fitzhardinge
  1 sibling, 1 reply; 35+ messages in thread
From: Kasper Sandberg @ 2008-08-29 20:31 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: Yinghai Lu, Ingo Molnar, Rafał Miłecki, Alan Jenkins,
	Hugh Dickens, H. Peter Anvin, Linux Kernel Mailing List

On Thu, 2008-08-28 at 20:28 -0700, Jeremy Fitzhardinge wrote:
> Yinghai Lu wrote:
> > On Thu, Aug 28, 2008 at 12:52 PM, Jeremy Fitzhardinge <jeremy@goop.org> wrote:
<snip>
> Yeah, OK, but I think it should default to ON for now.  The problem is
> that we had two very different systems (Sony Vaio and Intel desktop)
> exhibit the same problem in two different ways.  These systems worked

This is very interresting. I have a gigabyte X48 board, and everything
works perfect, however, in memtest86+ i get thousands of errors in the
range 0-0.9mb. I am certain my ram is fine, i've tried it in other
computers, and reversed. I've run dozens of stress tests within my
booted linux, no trouble, also the userspace memtester (allthough im
aware it wont actually ever get to grab offending addresses).

Any thoughts?

> fine until we slightly changed the way that pagetable memory was
> allocated.  We just don't know how many other systems are doing this
> kind of subtle corruption of low memory.
> 
>     J
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC] x86: check for and defend against BIOS memory corruption
  2008-08-29 20:31     ` Kasper Sandberg
@ 2008-08-30  1:15       ` Jeremy Fitzhardinge
  0 siblings, 0 replies; 35+ messages in thread
From: Jeremy Fitzhardinge @ 2008-08-30  1:15 UTC (permalink / raw)
  To: Kasper Sandberg
  Cc: Yinghai Lu, Ingo Molnar, Rafa? Mi?ecki, Alan Jenkins,
	Hugh Dickens, H. Peter Anvin, Linux Kernel Mailing List

Kasper Sandberg wrote:
> On Thu, 2008-08-28 at 20:28 -0700, Jeremy Fitzhardinge wrote:
>   
>> Yinghai Lu wrote:
>>     
>>> On Thu, Aug 28, 2008 at 12:52 PM, Jeremy Fitzhardinge <jeremy@goop.org> wrote:
>>>       
> <snip>
>   
>> Yeah, OK, but I think it should default to ON for now.  The problem is
>> that we had two very different systems (Sony Vaio and Intel desktop)
>> exhibit the same problem in two different ways.  These systems worked
>>     
>
> This is very interresting. I have a gigabyte X48 board, and everything
> works perfect, however, in memtest86+ i get thousands of errors in the
> range 0-0.9mb. I am certain my ram is fine, i've tried it in other
> computers, and reversed. I've run dozens of stress tests within my
> booted linux, no trouble, also the userspace memtester (allthough im
> aware it wont actually ever get to grab offending addresses).
>
> Any thoughts?
>   

I would expect you'd get crashes if you had massive corruption in that
area.  By default it's being used for the kernel pagetables, so stomping
on them will cause problems.  In the cases we've been looking at so far,
it's only been a word or two being stomped.

The low 1Mbyte is used as general memory, so corruption over the whole
area will have widespread effects.

But try this patch (Hugh's version) and see what happens.

    J

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC] x86: check for and defend against BIOS memory corruption
  2008-08-29 17:39                 ` Rafał Miłecki
@ 2008-09-04 19:42                   ` Rafał Miłecki
  2008-09-04 20:23                     ` Hugh Dickins
  0 siblings, 1 reply; 35+ messages in thread
From: Rafał Miłecki @ 2008-09-04 19:42 UTC (permalink / raw)
  To: Hugh Dickins
  Cc: Alan Jenkins, Alan Cox, Jeremy Fitzhardinge, Yinghai Lu,
	Ingo Molnar, H. Peter Anvin, Linux Kernel Mailing List

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset=UTF-8, Size: 1192 bytes --]

2008/8/29 Rafał Miłecki <zajec5@gmail.com>:> 2008/8/29 Hugh Dickins <hugh@veritas.com>:>> Here's my version of Jeremy's patch, that I've now tested on my machines,>> as x86_32 and as x86_64.  It addresses none of the points Alan Cox made,>> and it stays silent for me, even after suspend+resume, unless I actually>> introduce corruption myself.  Omits Jeremy's check in fault.c, but does>> a check every minute, so should soon detect Rafał's HDMI corruption>> without any need to suspend+resume.>> Your periodic test works fine:>> Corrupted low memory at ffff88000000be9c (be9c phys) = b02a0004>  <IRQ>  [<ffffffff8020fc9b>] check_for_bios_corruption+0x93/0x9f>  [<ffffffff8020fca7>] ? periodic_check_for_corruption+0x0/0x25>  [<ffffffff8020fcb0>] periodic_check_for_corruption+0x9/0x25>> By the way I confirmed this bug on Sony Vaio FW11M (my one is FW11S).> Probably more machines from FW11* are affected.
If this patch is known to work fine for Sony Vaio FW* and Alan'smachine, could it go mainline somehow?
-- RafaÅ‚ MiÅ‚eckiÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC] x86: check for and defend against BIOS memory corruption
  2008-09-04 19:42                   ` Rafał Miłecki
@ 2008-09-04 20:23                     ` Hugh Dickins
  2008-09-04 23:04                       ` Jeremy Fitzhardinge
  0 siblings, 1 reply; 35+ messages in thread
From: Hugh Dickins @ 2008-09-04 20:23 UTC (permalink / raw)
  To: Rafał Miłecki
  Cc: Alan Jenkins, Alan Cox, Jeremy Fitzhardinge, Yinghai Lu,
	Ingo Molnar, H. Peter Anvin, Linux Kernel Mailing List

[-- Attachment #1: Type: TEXT/PLAIN, Size: 2172 bytes --]

On Thu, 4 Sep 2008, Rafał Miłecki wrote:
> > 2008/8/29 Rafał Miłecki <zajec5@gmail.com>:
> > 2008/8/29 Hugh Dickins <hugh@veritas.com>:
> >> Here's my version of Jeremy's patch, that I've now tested on my machines,
> >> as x86_32 and as x86_64.  It addresses none of the points Alan Cox made,
> >> and it stays silent for me, even after suspend+resume, unless I actually
> >> introduce corruption myself.  Omits Jeremy's check in fault.c, but does
> >> a check every minute, so should soon detect Rafał's HDMI corruption
> >> without any need to suspend+resume.
> >
> > Your periodic test works fine:
> >
> > Corrupted low memory at ffff88000000be9c (be9c phys) = b02a0004
> >  <IRQ>  [<ffffffff8020fc9b>] check_for_bios_corruption+0x93/0x9f
> >  [<ffffffff8020fca7>] ? periodic_check_for_corruption+0x0/0x25
> >  [<ffffffff8020fcb0>] periodic_check_for_corruption+0x9/0x25
> >
> > By the way I confirmed this bug on Sony Vaio FW11M (my one is FW11S).
> > Probably more machines from FW11* are affected.
> 
> If this patch is known to work fine for Sony Vaio FW* and Alan's
> machine, could it go mainline somehow?

Well.

Thanks for the prod, and I'm certainly remiss for not following
up sooner.  But I'm really not at all keen on such a patch going
into mainline myself.

It's an interesting experiment, and I'd be happy to see such a patch
(adjusted to make sure output goes to kerneloops.org) spending a little
while in Fedora Rawhide (who'd be the right contact for that?).

But so far as mainline goes, I share Alan Cox's opinion that we should
not be chopping pages out of every x86 user's memory, just because a
couple of machines with faulty BIOSes have been observed.

Particularly now it's evident that the 64kB "limit" is no more than a
reflection of where the directmap pagetable changes have caught such
corruption.

If lots more such corruptions are reported, of course I would change
my position; but those bad directmap PMD crashes are themselves quite
recognizable now we know to look out for them.

I would prefer you both to use the minimal memmap= solutions for now;
but others may disagree.

Hugh

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC] x86: check for and defend against BIOS memory corruption
  2008-09-04 20:23                     ` Hugh Dickins
@ 2008-09-04 23:04                       ` Jeremy Fitzhardinge
  2008-09-06 18:09                         ` Ingo Molnar
  0 siblings, 1 reply; 35+ messages in thread
From: Jeremy Fitzhardinge @ 2008-09-04 23:04 UTC (permalink / raw)
  To: Hugh Dickins
  Cc: Rafał Miłecki, Alan Jenkins, Alan Cox, Yinghai Lu,
	Ingo Molnar, H. Peter Anvin, Linux Kernel Mailing List,
	Dave Jones

Hugh Dickins wrote:
> Well.
>
> Thanks for the prod, and I'm certainly remiss for not following
> up sooner.  But I'm really not at all keen on such a patch going
> into mainline myself.
>   

I have my original patch with your changes as a followup patch sitting
in my queue.  I was planning on sending it in the next day or so.  I was
planning on adding the memory size parameter too.

> It's an interesting experiment, and I'd be happy to see such a patch
> (adjusted to make sure output goes to kerneloops.org) spending a little
> while in Fedora Rawhide (who'd be the right contact for that?).
>   

Dave Jones?  He used to do it, at least.  I guess a WARN_ON() would get
picked up by kerneloops.

> But so far as mainline goes, I share Alan Cox's opinion that we should
> not be chopping pages out of every x86 user's memory, just because a
> couple of machines with faulty BIOSes have been observed.
>   

I think that's a worthwhile cost for -rc.  We can fix it up (ie, make it
a real config option, defaulting off) for release once we're happy that
we understand the scope of the problem.

> Particularly now it's evident that the 64kB "limit" is no more than a
> reflection of where the directmap pagetable changes have caught such
> corruption.
>   

We should definitely make the kernel parameter set the banned memory
size so we can experiment with different cutoffs.

> If lots more such corruptions are reported, of course I would change
> my position; but those bad directmap PMD crashes are themselves quite
> recognizable now we know to look out for them.
>
> I would prefer you both to use the minimal memmap= solutions for now;
> but others may disagree.
>   

The fact that we're seeing this problem in two completely different
systems with different BIOSes and everything else makes me worried that
this is quite widespread.  It's only the persistence and diligence of
our bug reporters that we managed to work out that they're the same
problem.  How many other people are getting strange crashes and haven't
managed to correlate it any particular BIOS interaction?  Or just happen
to be corrupting memory we don't care about right now, but is only a
small code change or link order change away from disaster?

    J

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC] x86: check for and defend against BIOS memory corruption
  2008-09-04 23:04                       ` Jeremy Fitzhardinge
@ 2008-09-06 18:09                         ` Ingo Molnar
  0 siblings, 0 replies; 35+ messages in thread
From: Ingo Molnar @ 2008-09-06 18:09 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: Hugh Dickins, Rafał Miłecki, Alan Jenkins, Alan Cox,
	Yinghai Lu, H. Peter Anvin, Linux Kernel Mailing List,
	Dave Jones


* Jeremy Fitzhardinge <jeremy@goop.org> wrote:

> > I would prefer you both to use the minimal memmap= solutions for 
> > now; but others may disagree.
> 
> The fact that we're seeing this problem in two completely different 
> systems with different BIOSes and everything else makes me worried 
> that this is quite widespread.  It's only the persistence and 
> diligence of our bug reporters that we managed to work out that 
> they're the same problem.  How many other people are getting strange 
> crashes and haven't managed to correlate it any particular BIOS 
> interaction?  Or just happen to be corrupting memory we don't care 
> about right now, but is only a small code change or link order change 
> away from disaster?

please put this all behind a .config debug option that distros can turn 
on/off. Also, when it's enabled in the .config, there should be another 
.config option that marks it disabled by default but it can be enabled 
via a boot parameter.

Distro debug kernels will most likely enable the .config - even release 
kernels might enable it it, with default off - users can enable the boot 
switch if they suspect something, without having to build a new kernel.

	Ingo

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC] x86: check for and defend against BIOS memory corruption
  2008-08-29 14:48   ` Jeremy Fitzhardinge
  2008-08-29 17:20     ` H. Peter Anvin
@ 2008-09-08 11:35     ` Hugh Dickins
  2008-09-08 17:16       ` Jeremy Fitzhardinge
  1 sibling, 1 reply; 35+ messages in thread
From: Hugh Dickins @ 2008-09-08 11:35 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: Ingo Molnar, Rafał Miłecki, Alan Jenkins,
	H. Peter Anvin, Linux Kernel Mailing List

On Fri, 29 Aug 2008, Jeremy Fitzhardinge wrote:
> Hugh Dickins wrote:
> 
> > Is this the right moment for me to mention again that I'm not sure
> > your reuse of existing pagetables was quite right anyway: NX being
> > excluded from level2_ident_pgt, but wanted in the direct map?
> 
> We could add NX.  What's the behaviour of setting NX in a
> non-NX-supporting CPU?  I don't think it would trigger a "reserved bit"
> exception (the other high pte flags don't).  Or failing that, we could
> mask out NX once we've worked out the CPU doesn't support it (at the
> same time it relocates the pagetables to the kernel's load-time address).

I've no experience of what happens if NX is set to a non-NX-supporting
CPU, so can't advise on that at all.  But I think you're looking at
it the wrong way round - or else I am.

Here's the declaration and comment on level2_ident_pgt in head_64.S:

NEXT_PAGE(level2_ident_pgt)
	/* Since I easily can, map the first 1G.
	 * Don't set NX because code runs from these pages.
	 */
	PMDS(0, __PAGE_KERNEL_LARGE_EXEC, PTRS_PER_PMD)

(The "Since I easily can" comment is there because it used to map
only a subset needed for early kernel startup, not the whole 1G.)

So it's very intentionally leaving NX out there.  I believe the
level2_ident_pgt page appears twice or more in the pagetable layout,
used to map two or more areas of virtual address space - once to
provide the direct map at ffff880000000000 and once to provide
the kernel image virtual mapping at ffffffff80200000.

I think we don't want NX on Linux kernel text ;-?

Before your 2.6.27-rc changes, init_memory_mapping subsequently
replaced the direct map usage by a separately constructed pagetable,
similar to it but with NX set throughout.  After your 2.6.27-rc
changes, level2_ident_pgt is found there already so left untouched -
leaving NX out of that first 1G of the direct map forever (when CPA
splits it up, the smaller pages inherit the lack of NX too).

I noticed when changing /proc/meminfo to show DirectMap in kB,
and tried to fix it, but only gave myself a non-booting system
(I probably shrank my direct map to 0 while implicitly using it).
And at that stage I didn't realize at all that it was a recent
regression arising from your mods.

I'm reluctant to delve in there again at present, and unsure
what the right fix should be: perhaps the code which checks if
an entry is already there, should check if it has the desired
flags?

Hugh

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC] x86: check for and defend against BIOS memory corruption
  2008-09-08 11:35     ` Hugh Dickins
@ 2008-09-08 17:16       ` Jeremy Fitzhardinge
  2008-09-08 19:14         ` Hugh Dickins
  0 siblings, 1 reply; 35+ messages in thread
From: Jeremy Fitzhardinge @ 2008-09-08 17:16 UTC (permalink / raw)
  To: Hugh Dickins
  Cc: Ingo Molnar, Rafa? Mi?ecki, Alan Jenkins, H. Peter Anvin,
	Linux Kernel Mailing List

Hugh Dickins wrote:
> On Fri, 29 Aug 2008, Jeremy Fitzhardinge wrote:
>   
>> Hugh Dickins wrote:
>>
>>     
>>> Is this the right moment for me to mention again that I'm not sure
>>> your reuse of existing pagetables was quite right anyway: NX being
>>> excluded from level2_ident_pgt, but wanted in the direct map?
>>>       
>> We could add NX.  What's the behaviour of setting NX in a
>> non-NX-supporting CPU?  I don't think it would trigger a "reserved bit"
>> exception (the other high pte flags don't).  Or failing that, we could
>> mask out NX once we've worked out the CPU doesn't support it (at the
>> same time it relocates the pagetables to the kernel's load-time address).
>>     
>
> I've no experience of what happens if NX is set to a non-NX-supporting
> CPU, so can't advise on that at all.  But I think you're looking at
> it the wrong way round - or else I am.
>
> Here's the declaration and comment on level2_ident_pgt in head_64.S:
>
> NEXT_PAGE(level2_ident_pgt)
> 	/* Since I easily can, map the first 1G.
> 	 * Don't set NX because code runs from these pages.
> 	 */
> 	PMDS(0, __PAGE_KERNEL_LARGE_EXEC, PTRS_PER_PMD)
>
> (The "Since I easily can" comment is there because it used to map
> only a subset needed for early kernel startup, not the whole 1G.)
>
> So it's very intentionally leaving NX out there.  I believe the
> level2_ident_pgt page appears twice or more in the pagetable layout,
> used to map two or more areas of virtual address space - once to
> provide the direct map at ffff880000000000 and once to provide
> the kernel image virtual mapping at ffffffff80200000.
>   

Hm, yes, I see what you mean.

> I think we don't want NX on Linux kernel text ;-?
>   

Might be problematic.

> Before your 2.6.27-rc changes, init_memory_mapping subsequently
> replaced the direct map usage by a separately constructed pagetable,
> similar to it but with NX set throughout.  After your 2.6.27-rc
> changes, level2_ident_pgt is found there already so left untouched -
> leaving NX out of that first 1G of the direct map forever (when CPA
> splits it up, the smaller pages inherit the lack of NX too).
>   

Well, if we never want the direct map to be non-executable (which I
think would be OK, since all the code is either core kernel or modules
which are mapped elsewhere), then we can set NX on the level4 for the
linear mapping which will make everything below non-executable.

> I'm reluctant to delve in there again at present, and unsure
> what the right fix should be: perhaps the code which checks if
> an entry is already there, should check if it has the desired
> flags?

Well, the specific reason I made these changes was to make sure that
there was never more than one entry mapping any kernel page, so that you
can update the page permissions on a kernel page with just one update. 
This is pretty much a requirement for Xen, and very convenient at other
times.  Native will use either L2 or L3 mappings for the kernel and
linear space, and Xen uses L1 mappings, so if we break the aliasing at
the L2 level I can still keep the L1s aliased, but it seems simpler to
set NX on the linear mapping's L4.

    J

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC] x86: check for and defend against BIOS memory corruption
  2008-09-08 17:16       ` Jeremy Fitzhardinge
@ 2008-09-08 19:14         ` Hugh Dickins
  2008-09-08 19:45           ` Jeremy Fitzhardinge
  0 siblings, 1 reply; 35+ messages in thread
From: Hugh Dickins @ 2008-09-08 19:14 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: Ingo Molnar, Rafa? Mi?ecki, Alan Jenkins, H. Peter Anvin,
	Linux Kernel Mailing List

On Mon, 8 Sep 2008, Jeremy Fitzhardinge wrote:
> 
> Well, if we never want the direct map to be non-executable (which I

One too many negatives?

> think would be OK, since all the code is either core kernel or modules
> which are mapped elsewhere),

I had thought so until just before replying.

> then we can set NX on the level4 for the
> linear mapping which will make everything below non-executable.

That would be much the neatest answer: I hadn't realized
that inheritance (perhaps I'm still living in early-i386 days,
when IIRC there was a bug in inheriting WP from higher levels).

But then I stumbled across static_protections() in pageattr.c
(takes both addr and pfn, latter seems weird), whose BIOS_BEGIN
and BIOS_END seem to echo the ISA_START_ADDR and ISA_END_ADDR
used by is_ISA_range() in ioremap.c.

And peering at the pagetables I've got here for that area of the
direct map in 2.6.26 x86_64, yes, I'm missing NX from 0xc0000 to
0xfffff (presumably nothing tried to ioremap 0xa0000 to 0xbffff).

A simple answer might be to go the way you suggest, but remove
the special casing from pageattr.c and ioremap.c; but I fear
that will slow something down, or introduce further bugs.

Hugh

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC] x86: check for and defend against BIOS memory corruption
  2008-09-08 19:14         ` Hugh Dickins
@ 2008-09-08 19:45           ` Jeremy Fitzhardinge
  0 siblings, 0 replies; 35+ messages in thread
From: Jeremy Fitzhardinge @ 2008-09-08 19:45 UTC (permalink / raw)
  To: Hugh Dickins
  Cc: Ingo Molnar, Rafa? Mi?ecki, Alan Jenkins, H. Peter Anvin,
	Linux Kernel Mailing List

Hugh Dickins wrote:
> On Mon, 8 Sep 2008, Jeremy Fitzhardinge wrote:
>   
>> Well, if we never want the direct map to be non-executable (which I
>>     
>
> One too many negatives?
>   

Yes.  I over-corrected in my pre-send proofread.

>> think would be OK, since all the code is either core kernel or modules
>> which are mapped elsewhere),
>>     
>
> I had thought so until just before replying.
>
>   
>> then we can set NX on the level4 for the
>> linear mapping which will make everything below non-executable.
>>     
>
> That would be much the neatest answer: I hadn't realized
> that inheritance (perhaps I'm still living in early-i386 days,
> when IIRC there was a bug in inheriting WP from higher levels).
>   

In PAE mode the top-level doesn't let you set inheritable permissions
flags.  On i386 it only works on levels 2 and 1, but 64-bit fixes it to
work on all levels.

> But then I stumbled across static_protections() in pageattr.c
> (takes both addr and pfn, latter seems weird),

Well, it wants to set the mappings for some physical memory in the case
of BIOS and RO data, but only affect the kernel mappings in a particular
virtual range (so the linear mapping of the kernel is still NX).   (No
need for it to be inline though.)

But it does seem it wants the BIOS to be executable when you're using it
for PCI space.  A possible fix in that case is to create a separate
executable mapping of BIOS space.

That said, PCI_GOBIOS is only used on 32-bit anyway, so it's moot (its
only use is to define CONFIG_PCI_BIOS, and it depends on X86_32).

> A simple answer might be to go the way you suggest, but remove
> the special casing from pageattr.c and ioremap.c; but I fear
> that will slow something down, or introduce further bugs.
>   

It looks to me like it won't matter either way.

    J

^ permalink raw reply	[flat|nested] 35+ messages in thread

end of thread, other threads:[~2008-09-08 19:45 UTC | newest]

Thread overview: 35+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-08-28 19:52 [PATCH RFC] x86: check for and defend against BIOS memory corruption Jeremy Fitzhardinge
2008-08-29  1:49 ` Yinghai Lu
2008-08-29  3:28   ` Jeremy Fitzhardinge
2008-08-29  9:25     ` Alan Cox
2008-08-29 10:13       ` Rafał Miłecki
2008-08-29 10:06         ` Alan Cox
2008-08-29 10:24         ` Hugh Dickins
2008-08-29 11:54           ` Rafał Miłecki
2008-08-29 12:09             ` Alan Jenkins
2008-08-29 13:21               ` Hugh Dickins
2008-08-29 16:30                 ` Rafał Miłecki
2008-08-29 17:39                 ` Rafał Miłecki
2008-09-04 19:42                   ` Rafał Miłecki
2008-09-04 20:23                     ` Hugh Dickins
2008-09-04 23:04                       ` Jeremy Fitzhardinge
2008-09-06 18:09                         ` Ingo Molnar
2008-08-29 14:08           ` Jeremy Fitzhardinge
2008-08-29 14:18       ` Jeremy Fitzhardinge
2008-08-29 20:31     ` Kasper Sandberg
2008-08-30  1:15       ` Jeremy Fitzhardinge
2008-08-29  6:20 ` Rafał Miłecki
2008-08-29  6:45   ` Ingo Molnar
2008-08-29  7:21     ` Jeremy Fitzhardinge
2008-08-29  7:30       ` Ingo Molnar
2008-08-29  8:02         ` Jeremy Fitzhardinge
2008-08-29  7:22   ` Jeremy Fitzhardinge
2008-08-29  8:14 ` Hugh Dickins
2008-08-29 14:48   ` Jeremy Fitzhardinge
2008-08-29 17:20     ` H. Peter Anvin
2008-09-08 11:35     ` Hugh Dickins
2008-09-08 17:16       ` Jeremy Fitzhardinge
2008-09-08 19:14         ` Hugh Dickins
2008-09-08 19:45           ` Jeremy Fitzhardinge
2008-08-29 17:02   ` H. Peter Anvin
2008-08-29 17:03   ` H. Peter Anvin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).