linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH][Fix] Prevent swsusp from corrupting page translation tables during resume on x86-64
@ 2005-09-24 17:36 Rafael J. Wysocki
  2005-09-25 22:07 ` Pavel Machek
                   ` (2 more replies)
  0 siblings, 3 replies; 12+ messages in thread
From: Rafael J. Wysocki @ 2005-09-24 17:36 UTC (permalink / raw)
  To: Pavel Machek; +Cc: LKML, Andi Kleen, Andrew Morton

[The previous message has been sent accidentally without the patch]

Hi,

The following patch fixes Bug #4959.  It creates temporary page translation
tables located in the page frames that are not overwritten by swsusp while copying
the image.

The temporary page translation tables are generally based on the existing ones
with the exception that the mappings using 4KB pages are replaced with the
equivalent mappings that use 2MB pages only.  The temporary page tables are
only used for copying the image.

Please consider for applying.

Greetings,
Rafael


Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>

Index: linux-2.6.14-rc2-git3/arch/x86_64/kernel/suspend.c
===================================================================
--- linux-2.6.14-rc2-git3.orig/arch/x86_64/kernel/suspend.c	2005-09-23 18:20:41.000000000 +0200
+++ linux-2.6.14-rc2-git3/arch/x86_64/kernel/suspend.c	2005-09-23 18:21:48.000000000 +0200
@@ -11,6 +11,17 @@
 #include <linux/smp.h>
 #include <linux/suspend.h>
 #include <asm/proto.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+
+/* Defined in kernel/power/swsusp.c */
+extern unsigned long get_usable_page(unsigned gfp_mask);
+
+pgd_t *temp_level4_pgt;
+
+#define __PGTABLE_MASK		0x000ffffffffff000UL
+#define __PGTABLE_PSE_MASK	0x000fffffffe00000UL
 
 struct saved_context saved_context;
 
@@ -140,4 +151,69 @@
 
 }
 
+static int __duplicate_page_tables(pgd_t *src_pgd, pgd_t *pgd, unsigned long map_offset)
+{
+	pud_t *src_pud, *pud;
+	pmd_t *src_pmd, *pmd;
+	pte_t *src_pte;
+	int i, j, k;
+
+	pr_debug("Duplicating pagetables for the map 0x%016lx at 0x%016lx\n",
+		map_offset, (unsigned long)src_pgd);
+	i = pgd_index(map_offset);
+	j = pud_index(map_offset);
+	k = pmd_index(map_offset);
+	src_pgd += i;
+	pgd += i;
+	for (; pgd_val(*src_pgd) && i < PTRS_PER_PGD; i++, src_pgd++, pgd++) {
+		pud = (pud_t *)get_usable_page(GFP_ATOMIC);
+		if (!pud)
+			return -ENOMEM;
+		pgd_val(*pgd) = (pgd_val(*src_pgd) & ~__PGTABLE_MASK) |
+			__pa((unsigned long)pud);
+		pud += j;
+		src_pud = (pud_t *)__va(pgd_val(*src_pgd) & __PGTABLE_MASK) + j;
+		for (; pud_val(*src_pud) && j < PTRS_PER_PUD; j++, src_pud++, pud++) {
+			pmd = (pmd_t *)get_usable_page(GFP_ATOMIC);
+			if (!pmd)
+				return -ENOMEM;
+			pud_val(*pud) = (pud_val(*src_pud) & ~__PGTABLE_MASK) |
+				__pa((unsigned long)pmd);
+			pmd += k;
+			src_pmd = (pmd_t *)__va(pud_val(*src_pud) & __PGTABLE_MASK) + k;
+			for (; pmd_val(*src_pmd) && k < PTRS_PER_PMD; k++, src_pmd++, pmd++)
+				if (pmd_val(*src_pmd) & _PAGE_PSE) /* 2MB page */
+					pmd_val(*pmd) = pmd_val(*src_pmd);
+				else { /* 4KB page table -> 2MB page */
+					src_pte = __va(pmd_val(*src_pmd) & __PGTABLE_MASK);
+					pmd_val(*pmd) = ((pmd_val(*src_pmd) & ~__PGTABLE_MASK) |
+						_PAGE_PSE) |
+						(pte_val(*src_pte) & __PGTABLE_PSE_MASK);
+				}
+			k = 0;
+		}
+		j = 0;
+	}
+	return 0;
+}
 
+void duplicate_page_tables(void)
+{
+	int result = 0;
+
+	temp_level4_pgt = (pgd_t *)get_usable_page(GFP_ATOMIC);
+
+	if (!temp_level4_pgt)
+		result = -ENOMEM;
+
+	if (!result)
+		result = __duplicate_page_tables(init_level4_pgt, temp_level4_pgt,
+				PAGE_OFFSET);
+
+	if (!result)
+		result = __duplicate_page_tables(init_level4_pgt, temp_level4_pgt,
+				__START_KERNEL_map);
+
+	if (result)
+		panic("No room for temporary page translation tables!\n");
+}
Index: linux-2.6.14-rc2-git3/kernel/power/swsusp.c
===================================================================
--- linux-2.6.14-rc2-git3.orig/kernel/power/swsusp.c	2005-09-23 18:20:41.000000000 +0200
+++ linux-2.6.14-rc2-git3/kernel/power/swsusp.c	2005-09-23 18:21:48.000000000 +0200
@@ -1092,7 +1092,7 @@
 	*eaten_memory = c;
 }
 
-static unsigned long get_usable_page(unsigned gfp_mask)
+unsigned long get_usable_page(unsigned gfp_mask)
 {
 	unsigned long m;
 
@@ -1478,11 +1478,12 @@
 	/* Allocate memory for the image and read the data from swap */
 
 	error = check_pagedir(pagedir_nosave);
-	free_eaten_memory();
+
 	if (!error)
 		error = data_read(pagedir_nosave);
 
 	if (error) { /* We fail cleanly */
+		free_eaten_memory();
 		for_each_pbe (p, pagedir_nosave)
 			if (p->address) {
 				free_page(p->address);
Index: linux-2.6.14-rc2-git3/arch/x86_64/kernel/suspend_asm.S
===================================================================
--- linux-2.6.14-rc2-git3.orig/arch/x86_64/kernel/suspend_asm.S	2005-09-23 18:20:41.000000000 +0200
+++ linux-2.6.14-rc2-git3/arch/x86_64/kernel/suspend_asm.S	2005-09-23 18:21:48.000000000 +0200
@@ -40,11 +40,14 @@
 	ret
 
 ENTRY(swsusp_arch_resume)
-	/* set up cr3 */	
-	leaq	init_level4_pgt(%rip),%rax
-	subq	$__START_KERNEL_map,%rax
-	movq	%rax,%cr3
+	call	duplicate_page_tables
 
+	/* switch to temporary page tables */
+	movq	$__PAGE_OFFSET, %rdx
+	movq	temp_level4_pgt(%rip), %rax
+	subq	%rdx, %rax
+	movq	%rax, %cr3
+	/* Flush TLB */
 	movq	mmu_cr4_features(%rip), %rax
 	movq	%rax, %rdx
 	andq	$~(1<<7), %rdx	# PGE
@@ -69,6 +72,10 @@
 	movq	pbe_next(%rdx), %rdx
 	jmp	loop
 done:
+	/* go back to the original page tables */
+	leaq	init_level4_pgt(%rip), %rax
+	subq	$__START_KERNEL_map, %rax
+	movq	%rax, %cr3
 	/* Flush TLB, including "global" things (vmalloc) */
 	movq	mmu_cr4_features(%rip), %rax
 	movq	%rax, %rdx


-- 
- Would you tell me, please, which way I ought to go from here?
- That depends a good deal on where you want to get to.
		-- Lewis Carroll "Alice's Adventures in Wonderland"

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH][Fix] Prevent swsusp from corrupting page translation tables during resume on x86-64
  2005-09-24 17:36 [PATCH][Fix] Prevent swsusp from corrupting page translation tables during resume on x86-64 Rafael J. Wysocki
@ 2005-09-25 22:07 ` Pavel Machek
  2005-09-26 10:46   ` Rafael J. Wysocki
  2005-09-27  8:07 ` [PATCH][Fix] Fix Bug #4959 (take 2) Rafael J. Wysocki
  2005-09-28 10:29 ` [PATCH][Fix] Fix Bug #4959 (take 3) Rafael J. Wysocki
  2 siblings, 1 reply; 12+ messages in thread
From: Pavel Machek @ 2005-09-25 22:07 UTC (permalink / raw)
  To: Rafael J. Wysocki; +Cc: kernel list

Hi!

> The following patch fixes Bug #4959.  It creates temporary page translation
> tables located in the page frames that are not overwritten by swsusp while copying
> the image.
> 
> The temporary page translation tables are generally based on the existing ones
> with the exception that the mappings using 4KB pages are replaced with the
> equivalent mappings that use 2MB pages only.  The temporary page tables are
> only used for copying the image.

Would not it be simpler to create them from scratch? mm/init.c has
some handy functions, they should be applicable. [init_memory_mapping,
phys_pud_init]. Perhaps even initialize only simple direct mapping,
and place virt_to_phys() at strategic places?
								Pavel

> +static int __duplicate_page_tables(pgd_t *src_pgd, pgd_t *pgd, unsigned long map_offset)
> +{
> +	pud_t *src_pud, *pud;
> +	pmd_t *src_pmd, *pmd;
> +	pte_t *src_pte;
> +	int i, j, k;
> +
> +	pr_debug("Duplicating pagetables for the map 0x%016lx at 0x%016lx\n",
> +		map_offset, (unsigned long)src_pgd);
> +	i = pgd_index(map_offset);
> +	j = pud_index(map_offset);
> +	k = pmd_index(map_offset);
> +	src_pgd += i;
> +	pgd += i;
> +	for (; pgd_val(*src_pgd) && i < PTRS_PER_PGD; i++, src_pgd++, pgd++) {
> +		pud = (pud_t *)get_usable_page(GFP_ATOMIC);
> +		if (!pud)
> +			return -ENOMEM;
> +		pgd_val(*pgd) = (pgd_val(*src_pgd) & ~__PGTABLE_MASK) |
> +			__pa((unsigned long)pud);
> +		pud += j;
> +		src_pud = (pud_t *)__va(pgd_val(*src_pgd) & __PGTABLE_MASK) + j;
> +		for (; pud_val(*src_pud) && j < PTRS_PER_PUD; j++, src_pud++, pud++) {
> +			pmd = (pmd_t *)get_usable_page(GFP_ATOMIC);
> +			if (!pmd)
> +				return -ENOMEM;
> +			pud_val(*pud) = (pud_val(*src_pud) & ~__PGTABLE_MASK) |
> +				__pa((unsigned long)pmd);
> +			pmd += k;
> +			src_pmd = (pmd_t *)__va(pud_val(*src_pud) & __PGTABLE_MASK) + k;
> +			for (; pmd_val(*src_pmd) && k < PTRS_PER_PMD; k++, src_pmd++, pmd++)
> +				if (pmd_val(*src_pmd) & _PAGE_PSE) /* 2MB page */
> +					pmd_val(*pmd) = pmd_val(*src_pmd);
> +				else { /* 4KB page table -> 2MB page */
> +					src_pte = __va(pmd_val(*src_pmd) & __PGTABLE_MASK);
> +					pmd_val(*pmd) = ((pmd_val(*src_pmd) & ~__PGTABLE_MASK) |
> +						_PAGE_PSE) |
> +						(pte_val(*src_pte) & __PGTABLE_PSE_MASK);
> +				}
> +			k = 0;
> +		}
> +		j = 0;
> +	}
> +	return 0;
> +}

-- 
if you have sharp zaurus hardware you don't need... you know my address

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH][Fix] Prevent swsusp from corrupting page translation tables during resume on x86-64
  2005-09-25 22:07 ` Pavel Machek
@ 2005-09-26 10:46   ` Rafael J. Wysocki
  2005-09-26 10:49     ` Pavel Machek
  0 siblings, 1 reply; 12+ messages in thread
From: Rafael J. Wysocki @ 2005-09-26 10:46 UTC (permalink / raw)
  To: Pavel Machek; +Cc: kernel list, Andi Kleen

Hi,

On Monday, 26 of September 2005 00:07, Pavel Machek wrote:
> Hi!
> 
> > The following patch fixes Bug #4959.  It creates temporary page translation
> > tables located in the page frames that are not overwritten by swsusp while copying
> > the image.
> > 
> > The temporary page translation tables are generally based on the existing ones
> > with the exception that the mappings using 4KB pages are replaced with the
> > equivalent mappings that use 2MB pages only.  The temporary page tables are
> > only used for copying the image.
> 
> Would not it be simpler to create them from scratch? mm/init.c has
> some handy functions, they should be applicable. [init_memory_mapping,
> phys_pud_init]. Perhaps even initialize only simple direct mapping,
> and place virt_to_phys() at strategic places?

Perhaps, but the outcome would be very much the same.  The problem is to what
extent we can use the existing constructs, because I'd rather like to avoid the situation
in which any future changes to the initialization code would have to be replicated
in the swsusp code.  I'll have a look at that, but I'm not sure.  Also I don't really know
what the Andi's preferences are with this respect.

Anyway do you think that creating temporary page tables at the beginning of
swsusp_arch_resume() is a good idea?  If not, where do you think should they be
created?

Greetings,
Rafael

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH][Fix] Prevent swsusp from corrupting page translation tables during resume on x86-64
  2005-09-26 10:46   ` Rafael J. Wysocki
@ 2005-09-26 10:49     ` Pavel Machek
  0 siblings, 0 replies; 12+ messages in thread
From: Pavel Machek @ 2005-09-26 10:49 UTC (permalink / raw)
  To: Rafael J. Wysocki; +Cc: kernel list, Andi Kleen

Hi!

> > > The following patch fixes Bug #4959.  It creates temporary page translation
> > > tables located in the page frames that are not overwritten by swsusp while copying
> > > the image.
> > > 
> > > The temporary page translation tables are generally based on the existing ones
> > > with the exception that the mappings using 4KB pages are replaced with the
> > > equivalent mappings that use 2MB pages only.  The temporary page tables are
> > > only used for copying the image.
> > 
> > Would not it be simpler to create them from scratch? mm/init.c has
> > some handy functions, they should be applicable. [init_memory_mapping,
> > phys_pud_init]. Perhaps even initialize only simple direct mapping,
> > and place virt_to_phys() at strategic places?
> 
> Perhaps, but the outcome would be very much the same.  The problem is to what
> extent we can use the existing constructs, because I'd rather like to avoid the situation
> in which any future changes to the initialization code would have to be replicated
> in the swsusp code.  I'll have a look at that, but I'm not sure.  Also I don't really know
> what the Andi's preferences are with this respect.
> 
> Anyway do you think that creating temporary page tables at the beginning of
> swsusp_arch_resume() is a good idea?  If not, where do you think should they be
> created?

For i386, I simply let bootup code create them, and create "backup
copy" that is not damaged by any page splits... Result is in
mm/init.c:


#ifdef CONFIG_SOFTWARE_SUSPEND
/*
 * Swap suspend & friends need this for resume because things like the
intel-agp
 * driver might have split up a kernel 4MB mapping.
 */
char __nosavedata swsusp_pg_dir[PAGE_SIZE]
        __attribute__ ((aligned (PAGE_SIZE)));

static inline void save_pg_dir(void)
{
        memcpy(swsusp_pg_dir, swapper_pg_dir, PAGE_SIZE);
}
#else
static inline void save_pg_dir(void)
{
}
#endif

It is probably not as simple for x86-64 case, but some bootup code
reusal still should be neccessary...
								Pavel
-- 
if you have sharp zaurus hardware you don't need... you know my address

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH][Fix] Fix Bug #4959 (take 2)
  2005-09-24 17:36 [PATCH][Fix] Prevent swsusp from corrupting page translation tables during resume on x86-64 Rafael J. Wysocki
  2005-09-25 22:07 ` Pavel Machek
@ 2005-09-27  8:07 ` Rafael J. Wysocki
  2005-09-27 13:32   ` Pavel Machek
  2005-09-28 10:29 ` [PATCH][Fix] Fix Bug #4959 (take 3) Rafael J. Wysocki
  2 siblings, 1 reply; 12+ messages in thread
From: Rafael J. Wysocki @ 2005-09-27  8:07 UTC (permalink / raw)
  To: Pavel Machek; +Cc: LKML, Andi Kleen, Andrew Morton

Hi,

The following patch fixes Bug #4959.  For this purpose it creates temporary
page translation tables including the kernel mapping (reused) and the direct
mapping (created from scratch) and makes swsusp switch to these tables
right before the image is restored.

The code used to generate the direct mapping comes from arch/x86_64/mm/init.c.

Please consider for applying.

Greetings,
Rafael

PS
As you can see, I really would like this bug to go officially, as swsusp is hardly
usable on x86-64 with it, and I know some people using swsusp on this
architecture.  I just don't want them to be hurt by this bug.

NOTES:
(1) I'm quite sure that to fix the problem we need to use temporary page
translation tables that won't be modified in the process of copying the image.
(2) These page translation tables have to be present in memory before the
image is copied, so there are two possible ways in which they can be created:
	(a) in the startup kernel code that is executed before calling swsusp
	on resume, in which case they have to be marked with PG_nosave,
	(b) in swsusp, after the image has been loaded from disk (to set up
	the tables we need to know which pages will be overwritten while
	copying the image).
However, (a) is tricky, because it will only work if the tables are always located
at the same physical addresses, which I think would be quite difficult to achieve.
Moreover, such a code would have to be executed on every boot and the
temporary page tables would always be present in memory.
For this reason I decided to chose (b).
(3) To create the temporary page translation tables in swsusp I need to allocate
some memory pages and theoretically this operaction could fail.  Fortunately
the probability of this is extremely small, because the number of pages needed
is low (4 pages for a machine with 2GB of RAM) and we reserve 512 spare pages
on suspend (in principle they are reserved for I/O, but they should be available
at the time the temporary page tables are created).  Thus I think the memory
allocations here are completely safe.


Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>

Index: linux-2.6.14-rc2-git6/arch/x86_64/kernel/suspend.c
===================================================================
--- linux-2.6.14-rc2-git6.orig/arch/x86_64/kernel/suspend.c	2005-09-26 21:05:13.000000000 +0200
+++ linux-2.6.14-rc2-git6/arch/x86_64/kernel/suspend.c	2005-09-27 07:41:04.000000000 +0200
@@ -11,6 +11,17 @@
 #include <linux/smp.h>
 #include <linux/suspend.h>
 #include <asm/proto.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+
+/* Defined in kernel/power/swsusp.c */
+extern unsigned long get_usable_page(unsigned gfp_mask);
+extern void free_eaten_memory(void);
+/* Defined in arch/x86_64/kernel/suspend_asm.S */
+int restore_image(void);
+
+pgd_t *temp_level4_pgt;
 
 struct saved_context saved_context;
 
@@ -140,4 +151,128 @@
 
 }
 
+static void **pages;
+
+static inline void *__add_page(void)
+{
+	void **c;
+
+	c = (void **)get_usable_page(GFP_ATOMIC);
+	if (c) {
+		*c = pages;
+		pages = c;
+	}
+	return c;
+}
+
+static inline void *__next_page(void)
+{
+	void **c;
+
+	c = pages;
+	if (c) {
+		pages = *c;
+		*c = NULL;
+	}
+	return c;
+}
+
+/*
+ * Try to allocate as many usable pages as needed and daisy chain them.
+ * If one allocation fails, free the pages allocated so far
+ */
+static int alloc_usable_pages(unsigned long n)
+{
+	void *p;
+
+	pages = NULL;
+	do
+		if (!__add_page())
+			break;
+	while (--n);
+	if (n) {
+		p = __next_page();
+		while (p) {
+			free_page((unsigned long)p);
+			p = __next_page();
+		}
+		return -ENOMEM;
+	}
+	return 0;
+}
 
+static void phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
+{
+	long i, j;
+
+	i = pud_index(address);
+	pud = pud + i;
+	for (; i < PTRS_PER_PUD; pud++, i++) {
+		unsigned long paddr;
+		pmd_t *pmd;
+
+		paddr = address + i*PUD_SIZE;
+		if (paddr >= end) {
+			for (; i < PTRS_PER_PUD; i++, pud++)
+				set_pud(pud, __pud(0));
+			break;
+		}
+
+		pmd = (pmd_t *)__next_page();
+		set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
+		for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) {
+			unsigned long pe;
+
+			if (paddr >= end) {
+				for (; j < PTRS_PER_PMD; j++, pmd++)
+					set_pmd(pmd,  __pmd(0));
+				break;
+			}
+			pe = _PAGE_NX|_PAGE_PSE | _KERNPG_TABLE | _PAGE_GLOBAL | paddr;
+			pe &= __supported_pte_mask;
+			set_pmd(pmd, __pmd(pe));
+		}
+	}
+}
+
+static void set_up_temporary_mappings(void)
+{
+	unsigned long start, end, next;
+
+	temp_level4_pgt = (pgd_t *)__next_page();
+
+	/* It is safe to reuse the original kernel mapping */
+	set_pgd(temp_level4_pgt + pgd_index(__START_KERNEL_map),
+		init_level4_pgt[pgd_index(__START_KERNEL_map)]);
+
+	/* Set up the direct mapping from scratch */
+	start = (unsigned long)pfn_to_kaddr(0);
+	end = (unsigned long)pfn_to_kaddr(end_pfn);
+
+	for (; start < end; start = next) {
+		pud_t *pud = (pud_t *)__next_page();
+		next = start + PGDIR_SIZE;
+		if (next > end)
+			next = end;
+		phys_pud_init(pud, __pa(start), __pa(next));
+		set_pgd(temp_level4_pgt + pgd_index(start),
+			mk_kernel_pgd(__pa(pud)));
+	}
+}
+
+int swsusp_arch_resume(void)
+{
+	unsigned long n;
+
+	n = ((end_pfn << PAGE_SHIFT) + PUD_SIZE - 1) >> PUD_SHIFT;
+	n += n / PTRS_PER_PUD + !!(n % PTRS_PER_PUD) + 1;
+	pr_debug("swsusp_arch_resume(): pages needed = %lu\n", n);
+	if (alloc_usable_pages(n)) {
+		free_eaten_memory();
+		return -ENOMEM;
+	}
+	/* We have enough memory and from now on we cannot recover */
+	set_up_temporary_mappings();
+	restore_image();
+	return 0;
+}
Index: linux-2.6.14-rc2-git6/kernel/power/swsusp.c
===================================================================
--- linux-2.6.14-rc2-git6.orig/kernel/power/swsusp.c	2005-09-26 21:05:13.000000000 +0200
+++ linux-2.6.14-rc2-git6/kernel/power/swsusp.c	2005-09-26 22:13:21.000000000 +0200
@@ -1095,7 +1095,7 @@
 	*eaten_memory = c;
 }
 
-static unsigned long get_usable_page(unsigned gfp_mask)
+unsigned long get_usable_page(unsigned gfp_mask)
 {
 	unsigned long m;
 
@@ -1109,7 +1109,7 @@
 	return m;
 }
 
-static void free_eaten_memory(void)
+void free_eaten_memory(void)
 {
 	unsigned long m;
 	void **c;
@@ -1481,11 +1481,12 @@
 	/* Allocate memory for the image and read the data from swap */
 
 	error = check_pagedir(pagedir_nosave);
-	free_eaten_memory();
+
 	if (!error)
 		error = data_read(pagedir_nosave);
 
 	if (error) { /* We fail cleanly */
+		free_eaten_memory();
 		for_each_pbe (p, pagedir_nosave)
 			if (p->address) {
 				free_page(p->address);
Index: linux-2.6.14-rc2-git6/arch/x86_64/kernel/suspend_asm.S
===================================================================
--- linux-2.6.14-rc2-git6.orig/arch/x86_64/kernel/suspend_asm.S	2005-09-26 21:05:13.000000000 +0200
+++ linux-2.6.14-rc2-git6/arch/x86_64/kernel/suspend_asm.S	2005-09-27 00:33:35.000000000 +0200
@@ -39,12 +39,13 @@
 	call swsusp_save
 	ret
 
-ENTRY(swsusp_arch_resume)
-	/* set up cr3 */	
-	leaq	init_level4_pgt(%rip),%rax
-	subq	$__START_KERNEL_map,%rax
-	movq	%rax,%cr3
-
+ENTRY(restore_image)
+	/* switch to temporary page tables */
+	movq	$__PAGE_OFFSET, %rdx
+	movq	temp_level4_pgt(%rip), %rax
+	subq	%rdx, %rax
+	movq	%rax, %cr3
+	/* Flush TLB */
 	movq	mmu_cr4_features(%rip), %rax
 	movq	%rax, %rdx
 	andq	$~(1<<7), %rdx	# PGE
@@ -69,6 +70,10 @@
 	movq	pbe_next(%rdx), %rdx
 	jmp	loop
 done:
+	/* go back to the original page tables */
+	leaq	init_level4_pgt(%rip), %rax
+	subq	$__START_KERNEL_map, %rax
+	movq	%rax, %cr3
 	/* Flush TLB, including "global" things (vmalloc) */
 	movq	mmu_cr4_features(%rip), %rax
 	movq	%rax, %rdx


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH][Fix] Fix Bug #4959 (take 2)
  2005-09-27  8:07 ` [PATCH][Fix] Fix Bug #4959 (take 2) Rafael J. Wysocki
@ 2005-09-27 13:32   ` Pavel Machek
  2005-09-27 21:00     ` Rafael J. Wysocki
  0 siblings, 1 reply; 12+ messages in thread
From: Pavel Machek @ 2005-09-27 13:32 UTC (permalink / raw)
  To: Rafael J. Wysocki; +Cc: Pavel Machek, LKML, Andi Kleen, Andrew Morton

Hi!

I do not really like new exports from swsusp.c, but I'm afraid
there's no way around.

> The following patch fixes Bug #4959.  For this purpose it creates temporary
> page translation tables including the kernel mapping (reused) and the direct
> mapping (created from scratch) and makes swsusp switch to these tables
> right before the image is restored.

Why do you need *two* mappings? Should not just kernel mapping be enough?

> NOTES:
> (1) I'm quite sure that to fix the problem we need to use temporary page
> translation tables that won't be modified in the process of copying the image.
> (2) These page translation tables have to be present in memory before the
> image is copied, so there are two possible ways in which they can be created:
> 	(a) in the startup kernel code that is executed before calling swsusp
> 	on resume, in which case they have to be marked with PG_nosave,
> 	(b) in swsusp, after the image has been loaded from disk (to set up
> 	the tables we need to know which pages will be overwritten while
> 	copying the image).
> However, (a) is tricky, because it will only work if the tables are always located
> at the same physical addresses, which I think would be quite difficult to achieve.

Why? Reserve ten pages for them... static char resume_page_tables[10*PAGE_SIZE] does not
sound that bad.

> Moreover, such a code would have to be executed on every boot and the
> temporary page tables would always be present in memory.

Yep, but I do not see that as a big problem.


-- 
64 bytes from 195.113.31.123: icmp_seq=28 ttl=51 time=448769.1 ms         


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH][Fix] Fix Bug #4959 (take 2)
  2005-09-27 13:32   ` Pavel Machek
@ 2005-09-27 21:00     ` Rafael J. Wysocki
  2005-09-27 21:08       ` Pavel Machek
  0 siblings, 1 reply; 12+ messages in thread
From: Rafael J. Wysocki @ 2005-09-27 21:00 UTC (permalink / raw)
  To: Pavel Machek; +Cc: Pavel Machek, LKML, Andi Kleen, Andrew Morton

Hi,

On Tuesday, 27 of September 2005 15:32, Pavel Machek wrote:
> Hi!
> 
> I do not really like new exports from swsusp.c, but I'm afraid
> there's no way around.

Well, there is one, if we use a static buffer, as you propose, since
in that case we will not need get_usable_pages() etc. outside of
swsusp.c.  The drawback of this, however, is that we will limit the
size of RAM for which it is possible to suspend (we need at least 1 page
for the PGD, 1 page for a PUD plus as many pages as there are GBs of
RAM for PMDs).  If we want to cover the huge-RAM cases, the buffer will
be too large for the average case, but otherwise some boxes will not
be able to suspend.

> > The following patch fixes Bug #4959.  For this purpose it creates temporary
> > page translation tables including the kernel mapping (reused) and the direct
> > mapping (created from scratch) and makes swsusp switch to these tables
> > right before the image is restored.
> 
> Why do you need *two* mappings? Should not just kernel mapping be enough?

The kernel mapping is for the kernel text.  The direct mapping maps the physical
RAM linearly to the set of virtual addresses starting at PAGE_OFFSET.

> > NOTES:
> > (1) I'm quite sure that to fix the problem we need to use temporary page
> > translation tables that won't be modified in the process of copying the image.
> > (2) These page translation tables have to be present in memory before the
> > image is copied, so there are two possible ways in which they can be created:
> > 	(a) in the startup kernel code that is executed before calling swsusp
> > 	on resume, in which case they have to be marked with PG_nosave,
> > 	(b) in swsusp, after the image has been loaded from disk (to set up
> > 	the tables we need to know which pages will be overwritten while
> > 	copying the image).
> > However, (a) is tricky, because it will only work if the tables are always located
> > at the same physical addresses, which I think would be quite difficult to achieve.
> 
> Why? Reserve ten pages for them... static char resume_page_tables[10*PAGE_SIZE] does not
> sound that bad.

That will allow us to suspend if there's no more that 8GB of RAM in the box.
Is it acceptable?

> > Moreover, such a code would have to be executed on every boot and the
> > temporary page tables would always be present in memory.
> 
> Yep, but I do not see that as a big problem.

OK

I can reserve the static buffer (10 pages) in suspend.c and mark it as nosave.
The code that creates the mappings can stay in suspend.c either except it
won't need to call get_usable_page() and free_eaten_memory() any more
(__next_page() can be changed to get pages from the static buffer instead
of allocating them).  The code can also be simplified a bit, as we assume that
there will be only one PGD entry in the direct mapping.

If that sounds good to you, please confirm.

Greetings,
Rafael

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH][Fix] Fix Bug #4959 (take 2)
  2005-09-27 21:00     ` Rafael J. Wysocki
@ 2005-09-27 21:08       ` Pavel Machek
  2005-09-27 23:26         ` Rafael J. Wysocki
  0 siblings, 1 reply; 12+ messages in thread
From: Pavel Machek @ 2005-09-27 21:08 UTC (permalink / raw)
  To: Rafael J. Wysocki; +Cc: LKML, Andi Kleen, Andrew Morton

Hi!

> > I do not really like new exports from swsusp.c, but I'm afraid
> > there's no way around.
> 
> Well, there is one, if we use a static buffer, as you propose, since
> in that case we will not need get_usable_pages() etc. outside of
> swsusp.c.  The drawback of this, however, is that we will limit the
> size of RAM for which it is possible to suspend (we need at least 1 page
> for the PGD, 1 page for a PUD plus as many pages as there are GBs of
> RAM for PMDs).  If we want to cover the huge-RAM cases, the buffer will
> be too large for the average case, but otherwise some boxes will not
> be able to suspend.

1GB per 4K seems pretty much okay.

> > > The following patch fixes Bug #4959.  For this purpose it creates temporary
> > > page translation tables including the kernel mapping (reused) and the direct
> > > mapping (created from scratch) and makes swsusp switch to these tables
> > > right before the image is restored.
> > 
> > Why do you need *two* mappings? Should not just kernel mapping be enough?
> 
> The kernel mapping is for the kernel text.  The direct mapping maps the physical
> RAM linearly to the set of virtual addresses starting at
> PAGE_OFFSET.

Could not you just add phys_to_virt at some strategic place? 

> > Why? Reserve ten pages for them... static char resume_page_tables[10*PAGE_SIZE] does not
> > sound that bad.
> 
> That will allow us to suspend if there's no more that 8GB of RAM in the box.
> Is it acceptable?

I'd say so.

> > > Moreover, such a code would have to be executed on every boot and the
> > > temporary page tables would always be present in memory.
> > 
> > Yep, but I do not see that as a big problem.
> 
> OK
> 
> I can reserve the static buffer (10 pages) in suspend.c and mark it as nosave.
> The code that creates the mappings can stay in suspend.c either except it
> won't need to call get_usable_page() and free_eaten_memory() any more
> (__next_page() can be changed to get pages from the static buffer instead
> of allocating them).  The code can also be simplified a bit, as we assume that
> there will be only one PGD entry in the direct mapping.
> 
> If that sounds good to you, please confirm.

8GB limit seems good to me -- as long as it makes code significantly
simpler. It would be nice if it was <20 lines.
								Pavel
-- 
if you have sharp zaurus hardware you don't need... you know my address

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH][Fix] Fix Bug #4959 (take 2)
  2005-09-27 21:08       ` Pavel Machek
@ 2005-09-27 23:26         ` Rafael J. Wysocki
  2005-09-27 23:43           ` Pavel Machek
  0 siblings, 1 reply; 12+ messages in thread
From: Rafael J. Wysocki @ 2005-09-27 23:26 UTC (permalink / raw)
  To: Pavel Machek; +Cc: LKML, Andi Kleen, Andrew Morton

Hi,

On Tuesday, 27 of September 2005 23:08, Pavel Machek wrote:
> Hi!
> 
]-- snip --[ 
> > > > The following patch fixes Bug #4959.  For this purpose it creates temporary
> > > > page translation tables including the kernel mapping (reused) and the direct
> > > > mapping (created from scratch) and makes swsusp switch to these tables
> > > > right before the image is restored.
> > > 
> > > Why do you need *two* mappings? Should not just kernel mapping be enough?
> > 
> > The kernel mapping is for the kernel text.  The direct mapping maps the physical
> > RAM linearly to the set of virtual addresses starting at
> > PAGE_OFFSET.
> 
> Could not you just add phys_to_virt at some strategic place? 

No, I need both, but the original kernel mapping is safe, so I don't care (I just point to
it from the temporary PGD).

> > > Why? Reserve ten pages for them... static char resume_page_tables[10*PAGE_SIZE] does not
> > > sound that bad.
> > 
> > That will allow us to suspend if there's no more that 8GB of RAM in the box.
> > Is it acceptable?
> 
> I'd say so.
> 
> > > > Moreover, such a code would have to be executed on every boot and the
> > > > temporary page tables would always be present in memory.
> > > 
> > > Yep, but I do not see that as a big problem.
> > 
> > OK
> > 
> > I can reserve the static buffer (10 pages) in suspend.c and mark it as nosave.
> > The code that creates the mappings can stay in suspend.c either except it
> > won't need to call get_usable_page() and free_eaten_memory() any more
> > (__next_page() can be changed to get pages from the static buffer instead
> > of allocating them).  The code can also be simplified a bit, as we assume that
> > there will be only one PGD entry in the direct mapping.
> > 
> > If that sounds good to you, please confirm.
> 
> 8GB limit seems good to me -- as long as it makes code significantly
> simpler. It would be nice if it was <20 lines.

It is more than that, but it seems to be quite simple anyway.

The new patch follows.

Greetings,
Rafael


Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>

Index: linux-2.6.14-rc2-git6/arch/x86_64/kernel/suspend.c
===================================================================
--- linux-2.6.14-rc2-git6.orig/arch/x86_64/kernel/suspend.c	2005-09-28 00:47:43.000000000 +0200
+++ linux-2.6.14-rc2-git6/arch/x86_64/kernel/suspend.c	2005-09-28 01:05:08.000000000 +0200
@@ -11,6 +11,21 @@
 #include <linux/smp.h>
 #include <linux/suspend.h>
 #include <asm/proto.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+
+#define MAX_RESUME_PUD_ENTRIES	8
+#define MAX_RESUME_RAM_SIZE	(MAX_RESUME_PUD_ENTRIES * PTRS_PER_PMD * PMD_SIZE)
+
+int arch_prepare_suspend(void)
+{
+	if (MAX_RESUME_RAM_SIZE < (end_pfn << PAGE_SHIFT)) {
+		printk(KERN_ERR "Too much RAM for suspend (%lu), max. allowed: %lu",
+			end_pfn << PAGE_SHIFT, MAX_RESUME_RAM_SIZE);
+		return -ENOMEM;
+	}
+	return 0;
+}
 
 struct saved_context saved_context;
 
@@ -140,4 +155,62 @@
 
 }
 
+/* Defined in arch/x86_64/kernel/suspend_asm.S */
+int restore_image(void);
+
+/* References to section boundaries */
+extern const void __nosave_begin, __nosave_end;
 
+pgd_t resume_level4_pgt[PTRS_PER_PGD] __nosavedata;
+pud_t resume_level3_pgt[PTRS_PER_PUD] __nosavedata;
+pmd_t resume_level2_pgt[MAX_RESUME_PUD_ENTRIES*PTRS_PER_PMD] __nosavedata;
+
+static void phys_pud_init(pud_t *pud, unsigned long end)
+{
+	long i, j;
+	pmd_t *pmd = resume_level2_pgt;
+
+	for (i = 0; i < PTRS_PER_PUD; pud++, i++) {
+		unsigned long paddr;
+
+		paddr = i*PUD_SIZE;
+		if (paddr >= end) {
+			for (; i < PTRS_PER_PUD; i++, pud++)
+				set_pud(pud, __pud(0));
+			break;
+		}
+
+		set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
+		for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) {
+			unsigned long pe;
+
+			if (paddr >= end) {
+				for (; j < PTRS_PER_PMD; j++, pmd++)
+					set_pmd(pmd,  __pmd(0));
+				break;
+			}
+			pe = _PAGE_NX|_PAGE_PSE | _KERNPG_TABLE | _PAGE_GLOBAL | paddr;
+			pe &= __supported_pte_mask;
+			set_pmd(pmd, __pmd(pe));
+		}
+	}
+}
+
+static void set_up_temporary_mappings(void)
+{
+	/* It is safe to reuse the original kernel mapping */
+	set_pgd(resume_level4_pgt + pgd_index(__START_KERNEL_map),
+		init_level4_pgt[pgd_index(__START_KERNEL_map)]);
+
+	/* Set up the direct mapping from scratch */
+	phys_pud_init(resume_level3_pgt, end_pfn << PAGE_SHIFT);
+	set_pgd(resume_level4_pgt + pgd_index(PAGE_OFFSET),
+		mk_kernel_pgd(__pa(resume_level3_pgt)));
+}
+
+int swsusp_arch_resume(void)
+{
+	set_up_temporary_mappings();
+	restore_image();
+	return 0;
+}
Index: linux-2.6.14-rc2-git6/arch/x86_64/kernel/suspend_asm.S
===================================================================
--- linux-2.6.14-rc2-git6.orig/arch/x86_64/kernel/suspend_asm.S	2005-09-28 00:47:43.000000000 +0200
+++ linux-2.6.14-rc2-git6/arch/x86_64/kernel/suspend_asm.S	2005-09-28 00:48:09.000000000 +0200
@@ -39,12 +39,12 @@
 	call swsusp_save
 	ret
 
-ENTRY(swsusp_arch_resume)
-	/* set up cr3 */	
-	leaq	init_level4_pgt(%rip),%rax
-	subq	$__START_KERNEL_map,%rax
-	movq	%rax,%cr3
-
+ENTRY(restore_image)
+	/* switch to temporary page tables */
+	leaq	resume_level4_pgt(%rip), %rax
+	subq	$__START_KERNEL_map, %rax
+	movq	%rax, %cr3
+	/* Flush TLB */
 	movq	mmu_cr4_features(%rip), %rax
 	movq	%rax, %rdx
 	andq	$~(1<<7), %rdx	# PGE
@@ -69,6 +69,10 @@
 	movq	pbe_next(%rdx), %rdx
 	jmp	loop
 done:
+	/* go back to the original page tables */
+	leaq	init_level4_pgt(%rip), %rax
+	subq	$__START_KERNEL_map, %rax
+	movq	%rax, %cr3
 	/* Flush TLB, including "global" things (vmalloc) */
 	movq	mmu_cr4_features(%rip), %rax
 	movq	%rax, %rdx
Index: linux-2.6.14-rc2-git6/include/asm-x86_64/suspend.h
===================================================================
--- linux-2.6.14-rc2-git6.orig/include/asm-x86_64/suspend.h	2005-09-28 00:47:43.000000000 +0200
+++ linux-2.6.14-rc2-git6/include/asm-x86_64/suspend.h	2005-09-28 00:48:09.000000000 +0200
@@ -6,11 +6,7 @@
 #include <asm/desc.h>
 #include <asm/i387.h>
 
-static inline int
-arch_prepare_suspend(void)
-{
-	return 0;
-}
+extern int arch_prepare_suspend(void);
 
 /* Image of the saved processor state. If you touch this, fix acpi_wakeup.S. */
 struct saved_context {

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH][Fix] Fix Bug #4959 (take 2)
  2005-09-27 23:26         ` Rafael J. Wysocki
@ 2005-09-27 23:43           ` Pavel Machek
  2005-09-28  8:25             ` Rafael J. Wysocki
  0 siblings, 1 reply; 12+ messages in thread
From: Pavel Machek @ 2005-09-27 23:43 UTC (permalink / raw)
  To: Rafael J. Wysocki; +Cc: LKML, Andi Kleen, Andrew Morton

Hi!

> > > I can reserve the static buffer (10 pages) in suspend.c and mark it as nosave.
> > > The code that creates the mappings can stay in suspend.c either except it
> > > won't need to call get_usable_page() and free_eaten_memory() any more
> > > (__next_page() can be changed to get pages from the static buffer instead
> > > of allocating them).  The code can also be simplified a bit, as we assume that
> > > there will be only one PGD entry in the direct mapping.
> > > 
> > > If that sounds good to you, please confirm.
> > 
> > 8GB limit seems good to me -- as long as it makes code significantly
> > simpler. It would be nice if it was <20 lines.
> 
> It is more than that, but it seems to be quite simple anyway.
> 
> The new patch follows.

Thanks, seems okay to me. 

								Pavel

-- 
if you have sharp zaurus hardware you don't need... you know my address

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH][Fix] Fix Bug #4959 (take 2)
  2005-09-27 23:43           ` Pavel Machek
@ 2005-09-28  8:25             ` Rafael J. Wysocki
  0 siblings, 0 replies; 12+ messages in thread
From: Rafael J. Wysocki @ 2005-09-28  8:25 UTC (permalink / raw)
  To: Pavel Machek; +Cc: LKML, Andi Kleen, Andrew Morton

Hi,

On Wednesday, 28 of September 2005 01:43, Pavel Machek wrote:
> Hi!
> 
> > > > I can reserve the static buffer (10 pages) in suspend.c and mark it as nosave.
> > > > The code that creates the mappings can stay in suspend.c either except it
> > > > won't need to call get_usable_page() and free_eaten_memory() any more
> > > > (__next_page() can be changed to get pages from the static buffer instead
> > > > of allocating them).  The code can also be simplified a bit, as we assume that
> > > > there will be only one PGD entry in the direct mapping.
> > > > 
> > > > If that sounds good to you, please confirm.
> > > 
> > > 8GB limit seems good to me -- as long as it makes code significantly
> > > simpler. It would be nice if it was <20 lines.
> > 
> > It is more than that, but it seems to be quite simple anyway.
> > 
> > The new patch follows.
> 
> Thanks, seems okay to me.

Thanks for reviewing.

If you don't mind, I'll change arch_prepare_suspend() a bit so that the
RAM sizes are printed in KB and repost the patch with a changelog.

Greetings,
Rafael

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH][Fix] Fix Bug #4959 (take 3)
  2005-09-24 17:36 [PATCH][Fix] Prevent swsusp from corrupting page translation tables during resume on x86-64 Rafael J. Wysocki
  2005-09-25 22:07 ` Pavel Machek
  2005-09-27  8:07 ` [PATCH][Fix] Fix Bug #4959 (take 2) Rafael J. Wysocki
@ 2005-09-28 10:29 ` Rafael J. Wysocki
  2 siblings, 0 replies; 12+ messages in thread
From: Rafael J. Wysocki @ 2005-09-28 10:29 UTC (permalink / raw)
  To: Pavel Machek; +Cc: LKML, Andi Kleen, Andrew Morton

Hi,

The following patch fixes Bug #4959.  For this purpose it creates temporary
page translation tables including the kernel mapping (reused) and the direct
mapping (created from scratch) and makes swsusp switch to these tables
right before the image is restored.

The code that generates the direct mapping is based on the code in
arch/x86_64/mm/init.c.

Please consider for applying.

Greetings,
Rafael


Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>

Index: linux-2.6.14-rc2-git6/arch/x86_64/kernel/suspend.c
===================================================================
--- linux-2.6.14-rc2-git6.orig/arch/x86_64/kernel/suspend.c	2005-09-28 01:17:23.000000000 +0200
+++ linux-2.6.14-rc2-git6/arch/x86_64/kernel/suspend.c	2005-09-28 10:28:05.000000000 +0200
@@ -11,6 +11,21 @@
 #include <linux/smp.h>
 #include <linux/suspend.h>
 #include <asm/proto.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+
+#define MAX_RESUME_PUD_ENTRIES	8
+#define MAX_RESUME_RAM_SIZE	(MAX_RESUME_PUD_ENTRIES * PTRS_PER_PMD * PMD_SIZE)
+
+int arch_prepare_suspend(void)
+{
+	if (MAX_RESUME_RAM_SIZE < (end_pfn << PAGE_SHIFT)) {
+		printk(KERN_ERR "Too much RAM for suspend (%lu K), max. allowed: %lu K",
+			end_pfn << (PAGE_SHIFT - 10), MAX_RESUME_RAM_SIZE >> 10);
+		return -ENOMEM;
+	}
+	return 0;
+}
 
 struct saved_context saved_context;
 
@@ -140,4 +155,62 @@
 
 }
 
+/* Defined in arch/x86_64/kernel/suspend_asm.S */
+int restore_image(void);
+
+/* References to section boundaries */
+extern const void __nosave_begin, __nosave_end;
 
+pgd_t resume_level4_pgt[PTRS_PER_PGD] __nosavedata;
+pud_t resume_level3_pgt[PTRS_PER_PUD] __nosavedata;
+pmd_t resume_level2_pgt[MAX_RESUME_PUD_ENTRIES*PTRS_PER_PMD] __nosavedata;
+
+static void phys_pud_init(pud_t *pud, unsigned long end)
+{
+	long i, j;
+	pmd_t *pmd = resume_level2_pgt;
+
+	for (i = 0; i < PTRS_PER_PUD; pud++, i++) {
+		unsigned long paddr;
+
+		paddr = i*PUD_SIZE;
+		if (paddr >= end) {
+			for (; i < PTRS_PER_PUD; i++, pud++)
+				set_pud(pud, __pud(0));
+			break;
+		}
+
+		set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
+		for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) {
+			unsigned long pe;
+
+			if (paddr >= end) {
+				for (; j < PTRS_PER_PMD; j++, pmd++)
+					set_pmd(pmd,  __pmd(0));
+				break;
+			}
+			pe = _PAGE_NX|_PAGE_PSE | _KERNPG_TABLE | _PAGE_GLOBAL | paddr;
+			pe &= __supported_pte_mask;
+			set_pmd(pmd, __pmd(pe));
+		}
+	}
+}
+
+static void set_up_temporary_mappings(void)
+{
+	/* It is safe to reuse the original kernel mapping */
+	set_pgd(resume_level4_pgt + pgd_index(__START_KERNEL_map),
+		init_level4_pgt[pgd_index(__START_KERNEL_map)]);
+
+	/* Set up the direct mapping from scratch */
+	phys_pud_init(resume_level3_pgt, end_pfn << PAGE_SHIFT);
+	set_pgd(resume_level4_pgt + pgd_index(PAGE_OFFSET),
+		mk_kernel_pgd(__pa(resume_level3_pgt)));
+}
+
+int swsusp_arch_resume(void)
+{
+	set_up_temporary_mappings();
+	restore_image();
+	return 0;
+}
Index: linux-2.6.14-rc2-git6/arch/x86_64/kernel/suspend_asm.S
===================================================================
--- linux-2.6.14-rc2-git6.orig/arch/x86_64/kernel/suspend_asm.S	2005-09-28 01:17:23.000000000 +0200
+++ linux-2.6.14-rc2-git6/arch/x86_64/kernel/suspend_asm.S	2005-09-28 01:18:12.000000000 +0200
@@ -39,12 +39,12 @@
 	call swsusp_save
 	ret
 
-ENTRY(swsusp_arch_resume)
-	/* set up cr3 */	
-	leaq	init_level4_pgt(%rip),%rax
-	subq	$__START_KERNEL_map,%rax
-	movq	%rax,%cr3
-
+ENTRY(restore_image)
+	/* switch to temporary page tables */
+	leaq	resume_level4_pgt(%rip), %rax
+	subq	$__START_KERNEL_map, %rax
+	movq	%rax, %cr3
+	/* Flush TLB */
 	movq	mmu_cr4_features(%rip), %rax
 	movq	%rax, %rdx
 	andq	$~(1<<7), %rdx	# PGE
@@ -69,6 +69,10 @@
 	movq	pbe_next(%rdx), %rdx
 	jmp	loop
 done:
+	/* go back to the original page tables */
+	leaq	init_level4_pgt(%rip), %rax
+	subq	$__START_KERNEL_map, %rax
+	movq	%rax, %cr3
 	/* Flush TLB, including "global" things (vmalloc) */
 	movq	mmu_cr4_features(%rip), %rax
 	movq	%rax, %rdx
Index: linux-2.6.14-rc2-git6/include/asm-x86_64/suspend.h
===================================================================
--- linux-2.6.14-rc2-git6.orig/include/asm-x86_64/suspend.h	2005-08-29 01:41:01.000000000 +0200
+++ linux-2.6.14-rc2-git6/include/asm-x86_64/suspend.h	2005-09-28 01:18:12.000000000 +0200
@@ -6,11 +6,7 @@
 #include <asm/desc.h>
 #include <asm/i387.h>
 
-static inline int
-arch_prepare_suspend(void)
-{
-	return 0;
-}
+extern int arch_prepare_suspend(void);
 
 /* Image of the saved processor state. If you touch this, fix acpi_wakeup.S. */
 struct saved_context {

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2005-09-28  8:25 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2005-09-24 17:36 [PATCH][Fix] Prevent swsusp from corrupting page translation tables during resume on x86-64 Rafael J. Wysocki
2005-09-25 22:07 ` Pavel Machek
2005-09-26 10:46   ` Rafael J. Wysocki
2005-09-26 10:49     ` Pavel Machek
2005-09-27  8:07 ` [PATCH][Fix] Fix Bug #4959 (take 2) Rafael J. Wysocki
2005-09-27 13:32   ` Pavel Machek
2005-09-27 21:00     ` Rafael J. Wysocki
2005-09-27 21:08       ` Pavel Machek
2005-09-27 23:26         ` Rafael J. Wysocki
2005-09-27 23:43           ` Pavel Machek
2005-09-28  8:25             ` Rafael J. Wysocki
2005-09-28 10:29 ` [PATCH][Fix] Fix Bug #4959 (take 3) Rafael J. Wysocki

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).