linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] x86/head: Refactor 32-bit pgtable setup
@ 2016-12-08 16:44 Boris Ostrovsky
  2016-12-09  4:33 ` Ingo Molnar
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Boris Ostrovsky @ 2016-12-08 16:44 UTC (permalink / raw)
  To: tglx, mingo, hpa; +Cc: x86, linux-kernel, xen-devel, matt, Boris Ostrovsky

The new Xen PVH entry point requires page tables to be setup by the
kernel since it is entered with paging disabled.

Pull the common code out of head_32.S so that mk_early_pgtbl_32 can be
invoked from both the new Xen entry point and the existing startup_32
code.

Convert resulting common code to C.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
This is replacement for https://lkml.org/lkml/2016/10/14/434, with
assembly code re-written in C as requested by Ingo.


 arch/x86/include/asm/pgtable_32.h |  32 ++++++++++
 arch/x86/kernel/head32.c          |  62 +++++++++++++++++++
 arch/x86/kernel/head_32.S         | 122 +++-----------------------------------
 3 files changed, 101 insertions(+), 115 deletions(-)

diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index b6c0b40..fbc7336 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -27,6 +27,7 @@
 
 extern pgd_t swapper_pg_dir[1024];
 extern pgd_t initial_page_table[1024];
+extern pmd_t initial_pg_pmd[];
 
 static inline void pgtable_cache_init(void) { }
 static inline void check_pgt_cache(void) { }
@@ -75,4 +76,35 @@ static inline void check_pgt_cache(void) { }
 #define kern_addr_valid(kaddr)	(0)
 #endif
 
+/*
+ * This is how much memory in addition to the memory covered up to
+ * and including _end we need mapped initially.
+ * We need:
+ *     (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE)
+ *     (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE)
+ *
+ * Modulo rounding, each megabyte assigned here requires a kilobyte of
+ * memory, which is currently unreclaimed.
+ *
+ * This should be a multiple of a page.
+ *
+ * KERNEL_IMAGE_SIZE should be greater than pa(_end)
+ * and small than max_low_pfn, otherwise will waste some page table entries
+ */
+#if PTRS_PER_PMD > 1
+#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD)
+#else
+#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD)
+#endif
+
+/*
+ * Number of possible pages in the lowmem region.
+ *
+ * We shift 2 by 31 instead of 1 by 32 to the left in order to avoid a
+ * gas warning about overflowing shift count when gas has been compiled
+ * with only a host target support using a 32-bit type for internal
+ * representation.
+ */
+#define LOWMEM_PAGES ((((2<<31) - __PAGE_OFFSET) >> PAGE_SHIFT))
+
 #endif /* _ASM_X86_PGTABLE_32_H */
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index f16c55b..e5fb436 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -49,3 +49,65 @@ asmlinkage __visible void __init i386_start_kernel(void)
 
 	start_kernel();
 }
+
+/*
+ * Initialize page tables.  This creates a PDE and a set of page
+ * tables, which are located immediately beyond __brk_base.  The variable
+ * _brk_end is set up to point to the first "safe" location.
+ * Mappings are created both at virtual address 0 (identity mapping)
+ * and PAGE_OFFSET for up to _end.
+ *
+ * In PAE mode initial_page_table is statically defined to contain
+ * enough entries to cover the VMSPLIT option (that is the top 1, 2 or 3
+ * entries). The identity mapping is handled by pointing two PGD entries
+ * to the first kernel PMD. Note the upper half of each PMD or PTE are
+ * always zero at this stage.
+ */
+void __init mk_early_pgtbl_32(void)
+{
+#ifdef __pa
+#undef __pa
+#endif
+#define __pa(x)  ((unsigned long)(x) - PAGE_OFFSET)
+	pte_t pte, *ptep;
+	int i;
+	unsigned long *ptr;
+	/* Enough space to fit pagetables for the low memory linear map */
+	const unsigned long limit = __pa(_end) +
+		(PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT);
+#ifdef CONFIG_X86_PAE
+	pmd_t pl2, *pl2p = (pmd_t *)__pa(initial_pg_pmd);
+#define SET_PL2(pl2, val)    { (pl2).pmd = (val); }
+#else
+	pgd_t pl2, *pl2p = (pgd_t *)__pa(initial_page_table);
+#define SET_PL2(pl2, val)   { (pl2).pgd = (val); }
+#endif
+
+	ptep = (pte_t *)__pa(__brk_base);
+	pte.pte = PTE_IDENT_ATTR;
+
+	while ((pte.pte & PTE_PFN_MASK) < limit) {
+
+		SET_PL2(pl2, (unsigned long)ptep | PDE_IDENT_ATTR);
+		*pl2p = pl2;
+#ifndef CONFIG_X86_PAE
+		/* Kernel PDE entry */
+		*(pl2p +  ((PAGE_OFFSET >> PGDIR_SHIFT))) = pl2;
+#endif
+		for (i = 0; i < PTRS_PER_PTE; i++) {
+			*ptep = pte;
+			pte.pte += PAGE_SIZE;
+			ptep++;
+		}
+
+		pl2p++;
+	}
+
+	ptr = (unsigned long *)__pa(&max_pfn_mapped);
+	/* Can't use pte_pfn() since it's a call with CONFIG_PARAVIRT */
+	*ptr = (pte.pte & PTE_PFN_MASK) >> PAGE_SHIFT;
+
+	ptr = (unsigned long *)__pa(&_brk_end);
+	*ptr = (unsigned long)ptep + PAGE_OFFSET;
+}
+
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 2dabea4..dc6b030 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -24,6 +24,7 @@
 #include <asm/nops.h>
 #include <asm/bootparam.h>
 #include <asm/export.h>
+#include <asm/pgtable_32.h>
 
 /* Physical address */
 #define pa(X) ((X) - __PAGE_OFFSET)
@@ -42,41 +43,6 @@
 #define X86_VENDOR_ID	new_cpu_data+CPUINFO_x86_vendor_id
 
 /*
- * This is how much memory in addition to the memory covered up to
- * and including _end we need mapped initially.
- * We need:
- *     (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE)
- *     (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE)
- *
- * Modulo rounding, each megabyte assigned here requires a kilobyte of
- * memory, which is currently unreclaimed.
- *
- * This should be a multiple of a page.
- *
- * KERNEL_IMAGE_SIZE should be greater than pa(_end)
- * and small than max_low_pfn, otherwise will waste some page table entries
- */
-
-#if PTRS_PER_PMD > 1
-#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD)
-#else
-#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD)
-#endif
-
-/*
- * Number of possible pages in the lowmem region.
- *
- * We shift 2 by 31 instead of 1 by 32 to the left in order to avoid a
- * gas warning about overflowing shift count when gas has been compiled
- * with only a host target support using a 32-bit type for internal
- * representation.
- */
-LOWMEM_PAGES = (((2<<31) - __PAGE_OFFSET) >> PAGE_SHIFT)
-
-/* Enough space to fit pagetables for the low memory linear map */
-MAPPING_BEYOND_END = PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT
-
-/*
  * Worst-case size of the kernel mapping we need to make:
  * a relocatable kernel can live anywhere in lowmem, so we need to be able
  * to map all of lowmem.
@@ -158,90 +124,15 @@ ENTRY(startup_32)
 	call load_ucode_bsp
 #endif
 
-/*
- * Initialize page tables.  This creates a PDE and a set of page
- * tables, which are located immediately beyond __brk_base.  The variable
- * _brk_end is set up to point to the first "safe" location.
- * Mappings are created both at virtual address 0 (identity mapping)
- * and PAGE_OFFSET for up to _end.
- */
-#ifdef CONFIG_X86_PAE
-
-	/*
-	 * In PAE mode initial_page_table is statically defined to contain
-	 * enough entries to cover the VMSPLIT option (that is the top 1, 2 or 3
-	 * entries). The identity mapping is handled by pointing two PGD entries
-	 * to the first kernel PMD.
-	 *
-	 * Note the upper half of each PMD or PTE are always zero at this stage.
-	 */
-
-#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */
-
-	xorl %ebx,%ebx				/* %ebx is kept at zero */
-
-	movl $pa(__brk_base), %edi
-	movl $pa(initial_pg_pmd), %edx
-	movl $PTE_IDENT_ATTR, %eax
-10:
-	leal PDE_IDENT_ATTR(%edi),%ecx		/* Create PMD entry */
-	movl %ecx,(%edx)			/* Store PMD entry */
-						/* Upper half already zero */
-	addl $8,%edx
-	movl $512,%ecx
-11:
-	stosl
-	xchgl %eax,%ebx
-	stosl
-	xchgl %eax,%ebx
-	addl $0x1000,%eax
-	loop 11b
-
-	/*
-	 * End condition: we must map up to the end + MAPPING_BEYOND_END.
-	 */
-	movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp
-	cmpl %ebp,%eax
-	jb 10b
-1:
-	addl $__PAGE_OFFSET, %edi
-	movl %edi, pa(_brk_end)
-	shrl $12, %eax
-	movl %eax, pa(max_pfn_mapped)
+	/* Create early pagetables. */
+	call  mk_early_pgtbl_32
 
 	/* Do early initialization of the fixmap area */
 	movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax
+#ifdef  CONFIG_X86_PAE
+#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */
 	movl %eax,pa(initial_pg_pmd+0x1000*KPMDS-8)
-#else	/* Not PAE */
-
-page_pde_offset = (__PAGE_OFFSET >> 20);
-
-	movl $pa(__brk_base), %edi
-	movl $pa(initial_page_table), %edx
-	movl $PTE_IDENT_ATTR, %eax
-10:
-	leal PDE_IDENT_ATTR(%edi),%ecx		/* Create PDE entry */
-	movl %ecx,(%edx)			/* Store identity PDE entry */
-	movl %ecx,page_pde_offset(%edx)		/* Store kernel PDE entry */
-	addl $4,%edx
-	movl $1024, %ecx
-11:
-	stosl
-	addl $0x1000,%eax
-	loop 11b
-	/*
-	 * End condition: we must map up to the end + MAPPING_BEYOND_END.
-	 */
-	movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp
-	cmpl %ebp,%eax
-	jb 10b
-	addl $__PAGE_OFFSET, %edi
-	movl %edi, pa(_brk_end)
-	shrl $12, %eax
-	movl %eax, pa(max_pfn_mapped)
-
-	/* Do early initialization of the fixmap area */
-	movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax
+#else
 	movl %eax,pa(initial_page_table+0xffc)
 #endif
 
@@ -662,6 +553,7 @@ ENTRY(setup_once_ref)
 __PAGE_ALIGNED_BSS
 	.align PAGE_SIZE
 #ifdef CONFIG_X86_PAE
+.globl initial_pg_pmd
 initial_pg_pmd:
 	.fill 1024*KPMDS,4,0
 #else
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH] x86/head: Refactor 32-bit pgtable setup
  2016-12-08 16:44 [PATCH] x86/head: Refactor 32-bit pgtable setup Boris Ostrovsky
@ 2016-12-09  4:33 ` Ingo Molnar
  2016-12-09 14:33   ` Boris Ostrovsky
  2017-01-05  8:52 ` Ingo Molnar
  2017-01-06 11:00 ` [tip:x86/boot] " tip-bot for Boris Ostrovsky
  2 siblings, 1 reply; 9+ messages in thread
From: Ingo Molnar @ 2016-12-09  4:33 UTC (permalink / raw)
  To: Boris Ostrovsky
  Cc: tglx, mingo, hpa, x86, linux-kernel, xen-devel, matt,
	Linus Torvalds, Andy Lutomirski, Borislav Petkov, Peter Zijlstra,
	Josh Poimboeuf, Denys Vlasenko, Brian Gerst


* Boris Ostrovsky <boris.ostrovsky@oracle.com> wrote:

> The new Xen PVH entry point requires page tables to be setup by the
> kernel since it is entered with paging disabled.
> 
> Pull the common code out of head_32.S so that mk_early_pgtbl_32 can be
> invoked from both the new Xen entry point and the existing startup_32
> code.
> 
> Convert resulting common code to C.
> 
> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
> ---
> This is replacement for https://lkml.org/lkml/2016/10/14/434, with
> assembly code re-written in C as requested by Ingo.
> 
> 
>  arch/x86/include/asm/pgtable_32.h |  32 ++++++++++
>  arch/x86/kernel/head32.c          |  62 +++++++++++++++++++
>  arch/x86/kernel/head_32.S         | 122 +++-----------------------------------
>  3 files changed, 101 insertions(+), 115 deletions(-)

Whee, I love it! And the code is so much more readable!

Did you have any particular robustness problems (difficult to resolve crashes) 
while developing it, or was it reasonably straightforward to do?

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] x86/head: Refactor 32-bit pgtable setup
  2016-12-09  4:33 ` Ingo Molnar
@ 2016-12-09 14:33   ` Boris Ostrovsky
  2016-12-18  8:44     ` Ingo Molnar
  0 siblings, 1 reply; 9+ messages in thread
From: Boris Ostrovsky @ 2016-12-09 14:33 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: tglx, mingo, hpa, x86, linux-kernel, xen-devel, matt,
	Linus Torvalds, Andy Lutomirski, Borislav Petkov, Peter Zijlstra,
	Josh Poimboeuf, Denys Vlasenko, Brian Gerst

On 12/08/2016 11:33 PM, Ingo Molnar wrote:
> * Boris Ostrovsky <boris.ostrovsky@oracle.com> wrote:
>
>> The new Xen PVH entry point requires page tables to be setup by the
>> kernel since it is entered with paging disabled.
>>
>> Pull the common code out of head_32.S so that mk_early_pgtbl_32 can be
>> invoked from both the new Xen entry point and the existing startup_32
>> code.
>>
>> Convert resulting common code to C.
>>
>> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
>> ---
>> This is replacement for https://lkml.org/lkml/2016/10/14/434, with
>> assembly code re-written in C as requested by Ingo.
>>
>>
>>  arch/x86/include/asm/pgtable_32.h |  32 ++++++++++
>>  arch/x86/kernel/head32.c          |  62 +++++++++++++++++++
>>  arch/x86/kernel/head_32.S         | 122 +++-----------------------------------
>>  3 files changed, 101 insertions(+), 115 deletions(-)
> Whee, I love it! And the code is so much more readable!
>
> Did you have any particular robustness problems (difficult to resolve crashes) 
> while developing it, or was it reasonably straightforward to do?

There was nothing particularly difficult beyond understanding current
code. That, of course, is not to say that there were no crashes but
developing this on a guest gives you pretty good insight into why/where
you crashed.

This was tested on bare-metal (in case you are wondering), but obviously
more testing is always good.


-boris

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] x86/head: Refactor 32-bit pgtable setup
  2016-12-09 14:33   ` Boris Ostrovsky
@ 2016-12-18  8:44     ` Ingo Molnar
  2016-12-19 14:09       ` Boris Ostrovsky
  0 siblings, 1 reply; 9+ messages in thread
From: Ingo Molnar @ 2016-12-18  8:44 UTC (permalink / raw)
  To: Boris Ostrovsky
  Cc: tglx, mingo, hpa, x86, linux-kernel, xen-devel, matt,
	Linus Torvalds, Andy Lutomirski, Borislav Petkov, Peter Zijlstra,
	Josh Poimboeuf, Denys Vlasenko, Brian Gerst


* Boris Ostrovsky <boris.ostrovsky@oracle.com> wrote:

> On 12/08/2016 11:33 PM, Ingo Molnar wrote:
> > * Boris Ostrovsky <boris.ostrovsky@oracle.com> wrote:
> >
> >> The new Xen PVH entry point requires page tables to be setup by the
> >> kernel since it is entered with paging disabled.
> >>
> >> Pull the common code out of head_32.S so that mk_early_pgtbl_32 can be
> >> invoked from both the new Xen entry point and the existing startup_32
> >> code.
> >>
> >> Convert resulting common code to C.
> >>
> >> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
> >> ---
> >> This is replacement for https://lkml.org/lkml/2016/10/14/434, with
> >> assembly code re-written in C as requested by Ingo.
> >>
> >>
> >>  arch/x86/include/asm/pgtable_32.h |  32 ++++++++++
> >>  arch/x86/kernel/head32.c          |  62 +++++++++++++++++++
> >>  arch/x86/kernel/head_32.S         | 122 +++-----------------------------------
> >>  3 files changed, 101 insertions(+), 115 deletions(-)
> > Whee, I love it! And the code is so much more readable!
> >
> > Did you have any particular robustness problems (difficult to resolve crashes) 
> > while developing it, or was it reasonably straightforward to do?
> 
> There was nothing particularly difficult beyond understanding current
> code. That, of course, is not to say that there were no crashes but
> developing this on a guest gives you pretty good insight into why/where
> you crashed.
> 
> This was tested on bare-metal (in case you are wondering), but obviously
> more testing is always good.

Ok, cool!

Would you like to carry this with your other Xen dependencies? If yes:

   Acked-by: Ingo Molnar <mingo@kernel.org>

If not then I can pick it up and get it to Linus in v4.10.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] x86/head: Refactor 32-bit pgtable setup
  2016-12-18  8:44     ` Ingo Molnar
@ 2016-12-19 14:09       ` Boris Ostrovsky
  0 siblings, 0 replies; 9+ messages in thread
From: Boris Ostrovsky @ 2016-12-19 14:09 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: tglx, mingo, hpa, x86, linux-kernel, xen-devel, matt,
	Linus Torvalds, Andy Lutomirski, Borislav Petkov, Peter Zijlstra,
	Josh Poimboeuf, Denys Vlasenko, Brian Gerst

On 12/18/2016 03:44 AM, Ingo Molnar wrote:
> * Boris Ostrovsky <boris.ostrovsky@oracle.com> wrote:
>
>> On 12/08/2016 11:33 PM, Ingo Molnar wrote:
>>> * Boris Ostrovsky <boris.ostrovsky@oracle.com> wrote:
>>>
>>>> The new Xen PVH entry point requires page tables to be setup by the
>>>> kernel since it is entered with paging disabled.
>>>>
>>>> Pull the common code out of head_32.S so that mk_early_pgtbl_32 can be
>>>> invoked from both the new Xen entry point and the existing startup_32
>>>> code.
>>>>
>>>> Convert resulting common code to C.
>>>>
>>>> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
>>>> ---
>>>> This is replacement for https://lkml.org/lkml/2016/10/14/434, with
>>>> assembly code re-written in C as requested by Ingo.
>>>>
>>>>
>>>>  arch/x86/include/asm/pgtable_32.h |  32 ++++++++++
>>>>  arch/x86/kernel/head32.c          |  62 +++++++++++++++++++
>>>>  arch/x86/kernel/head_32.S         | 122 +++-----------------------------------
>>>>  3 files changed, 101 insertions(+), 115 deletions(-)
>>> Whee, I love it! And the code is so much more readable!
>>>
>>> Did you have any particular robustness problems (difficult to resolve crashes) 
>>> while developing it, or was it reasonably straightforward to do?
>> There was nothing particularly difficult beyond understanding current
>> code. That, of course, is not to say that there were no crashes but
>> developing this on a guest gives you pretty good insight into why/where
>> you crashed.
>>
>> This was tested on bare-metal (in case you are wondering), but obviously
>> more testing is always good.
> Ok, cool!
>
> Would you like to carry this with your other Xen dependencies? If yes:
>
>    Acked-by: Ingo Molnar <mingo@kernel.org>
>
> If not then I can pick it up and get it to Linus in v4.10.


I don't think my series will get into 4.10 since it is has a dependency
on hypervisor code that is still being reviewed.

If you could take it via your tree it would be great. Thanks!

-boris

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] x86/head: Refactor 32-bit pgtable setup
  2016-12-08 16:44 [PATCH] x86/head: Refactor 32-bit pgtable setup Boris Ostrovsky
  2016-12-09  4:33 ` Ingo Molnar
@ 2017-01-05  8:52 ` Ingo Molnar
  2017-01-05 14:17   ` Boris Ostrovsky
  2017-01-05 20:12   ` [PATCH] x86/boot/32: Convert the 32-bit pgtable setup code from assembly to C kbuild test robot
  2017-01-06 11:00 ` [tip:x86/boot] " tip-bot for Boris Ostrovsky
  2 siblings, 2 replies; 9+ messages in thread
From: Ingo Molnar @ 2017-01-05  8:52 UTC (permalink / raw)
  To: Boris Ostrovsky; +Cc: tglx, mingo, hpa, x86, linux-kernel, xen-devel, matt


* Boris Ostrovsky <boris.ostrovsky@oracle.com> wrote:

> The new Xen PVH entry point requires page tables to be setup by the
> kernel since it is entered with paging disabled.
> 
> Pull the common code out of head_32.S so that mk_early_pgtbl_32 can be
> invoked from both the new Xen entry point and the existing startup_32
> code.
> 
> Convert resulting common code to C.
> 
> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
> ---
> This is replacement for https://lkml.org/lkml/2016/10/14/434, with
> assembly code re-written in C as requested by Ingo.
> 
> 
>  arch/x86/include/asm/pgtable_32.h |  32 ++++++++++
>  arch/x86/kernel/head32.c          |  62 +++++++++++++++++++
>  arch/x86/kernel/head_32.S         | 122 +++-----------------------------------
>  3 files changed, 101 insertions(+), 115 deletions(-)

Yeah, so (belatedly) I tried to merge this to latest upstream, via the commit 
below (note the slight edits to the changelog) - but 32-bit defconfig fails to 
build:

  arch/x86/kernel/head_32.S:615: Error: can't resolve `init_thread_union' {*UND* section} - `SIZEOF_PTREGS' {*UND* section}

Thanks,

	Ingo

=======================>
>From 071878c634fda7d9be3f015f05a89f5468e7e2e4 Mon Sep 17 00:00:00 2001
From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Date: Thu, 8 Dec 2016 11:44:31 -0500
Subject: [PATCH] x86/boot/32: Convert the 32-bit pgtable setup code from assembly to C

The new Xen PVH entry point requires page tables to be set up by the
kernel since it is entered with paging disabled.

Pull the common code out of head_32.S so that mk_early_pgtbl_32() can be
invoked from both the new Xen entry point and the existing startup_32()
code.

Convert resulting common code to C.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: matt@codeblueprint.co.uk
Cc: xen-devel@lists.xenproject.org
Link: http://lkml.kernel.org/r/1481215471-9639-1-git-send-email-boris.ostrovsky@oracle.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/pgtable_32.h |  32 ++++++++++
 arch/x86/kernel/head32.c          |  62 +++++++++++++++++++
 arch/x86/kernel/head_32.S         | 122 +++-----------------------------------
 3 files changed, 101 insertions(+), 115 deletions(-)

diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index b6c0b404898a..fbc73360aea0 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -27,6 +27,7 @@ struct vm_area_struct;
 
 extern pgd_t swapper_pg_dir[1024];
 extern pgd_t initial_page_table[1024];
+extern pmd_t initial_pg_pmd[];
 
 static inline void pgtable_cache_init(void) { }
 static inline void check_pgt_cache(void) { }
@@ -75,4 +76,35 @@ do {						\
 #define kern_addr_valid(kaddr)	(0)
 #endif
 
+/*
+ * This is how much memory in addition to the memory covered up to
+ * and including _end we need mapped initially.
+ * We need:
+ *     (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE)
+ *     (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE)
+ *
+ * Modulo rounding, each megabyte assigned here requires a kilobyte of
+ * memory, which is currently unreclaimed.
+ *
+ * This should be a multiple of a page.
+ *
+ * KERNEL_IMAGE_SIZE should be greater than pa(_end)
+ * and small than max_low_pfn, otherwise will waste some page table entries
+ */
+#if PTRS_PER_PMD > 1
+#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD)
+#else
+#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD)
+#endif
+
+/*
+ * Number of possible pages in the lowmem region.
+ *
+ * We shift 2 by 31 instead of 1 by 32 to the left in order to avoid a
+ * gas warning about overflowing shift count when gas has been compiled
+ * with only a host target support using a 32-bit type for internal
+ * representation.
+ */
+#define LOWMEM_PAGES ((((2<<31) - __PAGE_OFFSET) >> PAGE_SHIFT))
+
 #endif /* _ASM_X86_PGTABLE_32_H */
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index f16c55bfc090..e5fb436a6548 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -49,3 +49,65 @@ asmlinkage __visible void __init i386_start_kernel(void)
 
 	start_kernel();
 }
+
+/*
+ * Initialize page tables.  This creates a PDE and a set of page
+ * tables, which are located immediately beyond __brk_base.  The variable
+ * _brk_end is set up to point to the first "safe" location.
+ * Mappings are created both at virtual address 0 (identity mapping)
+ * and PAGE_OFFSET for up to _end.
+ *
+ * In PAE mode initial_page_table is statically defined to contain
+ * enough entries to cover the VMSPLIT option (that is the top 1, 2 or 3
+ * entries). The identity mapping is handled by pointing two PGD entries
+ * to the first kernel PMD. Note the upper half of each PMD or PTE are
+ * always zero at this stage.
+ */
+void __init mk_early_pgtbl_32(void)
+{
+#ifdef __pa
+#undef __pa
+#endif
+#define __pa(x)  ((unsigned long)(x) - PAGE_OFFSET)
+	pte_t pte, *ptep;
+	int i;
+	unsigned long *ptr;
+	/* Enough space to fit pagetables for the low memory linear map */
+	const unsigned long limit = __pa(_end) +
+		(PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT);
+#ifdef CONFIG_X86_PAE
+	pmd_t pl2, *pl2p = (pmd_t *)__pa(initial_pg_pmd);
+#define SET_PL2(pl2, val)    { (pl2).pmd = (val); }
+#else
+	pgd_t pl2, *pl2p = (pgd_t *)__pa(initial_page_table);
+#define SET_PL2(pl2, val)   { (pl2).pgd = (val); }
+#endif
+
+	ptep = (pte_t *)__pa(__brk_base);
+	pte.pte = PTE_IDENT_ATTR;
+
+	while ((pte.pte & PTE_PFN_MASK) < limit) {
+
+		SET_PL2(pl2, (unsigned long)ptep | PDE_IDENT_ATTR);
+		*pl2p = pl2;
+#ifndef CONFIG_X86_PAE
+		/* Kernel PDE entry */
+		*(pl2p +  ((PAGE_OFFSET >> PGDIR_SHIFT))) = pl2;
+#endif
+		for (i = 0; i < PTRS_PER_PTE; i++) {
+			*ptep = pte;
+			pte.pte += PAGE_SIZE;
+			ptep++;
+		}
+
+		pl2p++;
+	}
+
+	ptr = (unsigned long *)__pa(&max_pfn_mapped);
+	/* Can't use pte_pfn() since it's a call with CONFIG_PARAVIRT */
+	*ptr = (pte.pte & PTE_PFN_MASK) >> PAGE_SHIFT;
+
+	ptr = (unsigned long *)__pa(&_brk_end);
+	*ptr = (unsigned long)ptep + PAGE_OFFSET;
+}
+
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 4e8577d03372..eca1c93c38e8 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -24,6 +24,7 @@
 #include <asm/nops.h>
 #include <asm/bootparam.h>
 #include <asm/export.h>
+#include <asm/pgtable_32.h>
 
 /* Physical address */
 #define pa(X) ((X) - __PAGE_OFFSET)
@@ -42,43 +43,8 @@
 #define X86_VENDOR_ID	new_cpu_data+CPUINFO_x86_vendor_id
 
 /*
- * This is how much memory in addition to the memory covered up to
- * and including _end we need mapped initially.
- * We need:
- *     (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE)
- *     (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE)
- *
- * Modulo rounding, each megabyte assigned here requires a kilobyte of
- * memory, which is currently unreclaimed.
- *
- * This should be a multiple of a page.
- *
- * KERNEL_IMAGE_SIZE should be greater than pa(_end)
- * and small than max_low_pfn, otherwise will waste some page table entries
- */
-
-#if PTRS_PER_PMD > 1
-#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD)
-#else
-#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD)
-#endif
-
 #define SIZEOF_PTREGS 17*4
 
-/*
- * Number of possible pages in the lowmem region.
- *
- * We shift 2 by 31 instead of 1 by 32 to the left in order to avoid a
- * gas warning about overflowing shift count when gas has been compiled
- * with only a host target support using a 32-bit type for internal
- * representation.
- */
-LOWMEM_PAGES = (((2<<31) - __PAGE_OFFSET) >> PAGE_SHIFT)
-
-/* Enough space to fit pagetables for the low memory linear map */
-MAPPING_BEYOND_END = PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT
-
-/*
  * Worst-case size of the kernel mapping we need to make:
  * a relocatable kernel can live anywhere in lowmem, so we need to be able
  * to map all of lowmem.
@@ -160,90 +126,15 @@ ENTRY(startup_32)
 	call load_ucode_bsp
 #endif
 
-/*
- * Initialize page tables.  This creates a PDE and a set of page
- * tables, which are located immediately beyond __brk_base.  The variable
- * _brk_end is set up to point to the first "safe" location.
- * Mappings are created both at virtual address 0 (identity mapping)
- * and PAGE_OFFSET for up to _end.
- */
-#ifdef CONFIG_X86_PAE
-
-	/*
-	 * In PAE mode initial_page_table is statically defined to contain
-	 * enough entries to cover the VMSPLIT option (that is the top 1, 2 or 3
-	 * entries). The identity mapping is handled by pointing two PGD entries
-	 * to the first kernel PMD.
-	 *
-	 * Note the upper half of each PMD or PTE are always zero at this stage.
-	 */
-
-#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */
-
-	xorl %ebx,%ebx				/* %ebx is kept at zero */
-
-	movl $pa(__brk_base), %edi
-	movl $pa(initial_pg_pmd), %edx
-	movl $PTE_IDENT_ATTR, %eax
-10:
-	leal PDE_IDENT_ATTR(%edi),%ecx		/* Create PMD entry */
-	movl %ecx,(%edx)			/* Store PMD entry */
-						/* Upper half already zero */
-	addl $8,%edx
-	movl $512,%ecx
-11:
-	stosl
-	xchgl %eax,%ebx
-	stosl
-	xchgl %eax,%ebx
-	addl $0x1000,%eax
-	loop 11b
-
-	/*
-	 * End condition: we must map up to the end + MAPPING_BEYOND_END.
-	 */
-	movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp
-	cmpl %ebp,%eax
-	jb 10b
-1:
-	addl $__PAGE_OFFSET, %edi
-	movl %edi, pa(_brk_end)
-	shrl $12, %eax
-	movl %eax, pa(max_pfn_mapped)
+	/* Create early pagetables. */
+	call  mk_early_pgtbl_32
 
 	/* Do early initialization of the fixmap area */
 	movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax
+#ifdef  CONFIG_X86_PAE
+#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */
 	movl %eax,pa(initial_pg_pmd+0x1000*KPMDS-8)
-#else	/* Not PAE */
-
-page_pde_offset = (__PAGE_OFFSET >> 20);
-
-	movl $pa(__brk_base), %edi
-	movl $pa(initial_page_table), %edx
-	movl $PTE_IDENT_ATTR, %eax
-10:
-	leal PDE_IDENT_ATTR(%edi),%ecx		/* Create PDE entry */
-	movl %ecx,(%edx)			/* Store identity PDE entry */
-	movl %ecx,page_pde_offset(%edx)		/* Store kernel PDE entry */
-	addl $4,%edx
-	movl $1024, %ecx
-11:
-	stosl
-	addl $0x1000,%eax
-	loop 11b
-	/*
-	 * End condition: we must map up to the end + MAPPING_BEYOND_END.
-	 */
-	movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp
-	cmpl %ebp,%eax
-	jb 10b
-	addl $__PAGE_OFFSET, %edi
-	movl %edi, pa(_brk_end)
-	shrl $12, %eax
-	movl %eax, pa(max_pfn_mapped)
-
-	/* Do early initialization of the fixmap area */
-	movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax
+#else
 	movl %eax,pa(initial_page_table+0xffc)
 #endif
 
@@ -666,6 +557,7 @@ ENTRY(setup_once_ref)
 __PAGE_ALIGNED_BSS
 	.align PAGE_SIZE
 #ifdef CONFIG_X86_PAE
+.globl initial_pg_pmd
 initial_pg_pmd:
 	.fill 1024*KPMDS,4,0
 #else

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH] x86/head: Refactor 32-bit pgtable setup
  2017-01-05  8:52 ` Ingo Molnar
@ 2017-01-05 14:17   ` Boris Ostrovsky
  2017-01-05 20:12   ` [PATCH] x86/boot/32: Convert the 32-bit pgtable setup code from assembly to C kbuild test robot
  1 sibling, 0 replies; 9+ messages in thread
From: Boris Ostrovsky @ 2017-01-05 14:17 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: tglx, mingo, hpa, x86, linux-kernel, xen-devel, matt

On 01/05/2017 03:52 AM, Ingo Molnar wrote:
>
> Yeah, so (belatedly) I tried to merge this to latest upstream, via the commit 
> below (note the slight edits to the changelog) - but 32-bit defconfig fails to 
> build:
>
>   arch/x86/kernel/head_32.S:615: Error: can't resolve `init_thread_union' {*UND* section} - `SIZEOF_PTREGS' {*UND* section}

When you dropped MAPPING_BEYOND_END (that I indeed should have removed)
you left comment opening in the wrong place:

> diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
> index 4e8577d03372..eca1c93c38e8 100644
> --- a/arch/x86/kernel/head_32.S
> +++ b/arch/x86/kernel/head_32.S
> @@ -24,6 +24,7 @@
>  #include <asm/nops.h>
>  #include <asm/bootparam.h>
>  #include <asm/export.h>
> +#include <asm/pgtable_32.h>
>  
>  /* Physical address */
>  #define pa(X) ((X) - __PAGE_OFFSET)
> @@ -42,43 +43,8 @@
>  #define X86_VENDOR_ID	new_cpu_data+CPUINFO_x86_vendor_id

This one:

>  
>  /*
> - * This is how much memory in addition to the memory covered up to
> - * and including _end we need mapped initially.
> - * We need:
> - *     (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE)
> - *     (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE)
> - *
> - * Modulo rounding, each megabyte assigned here requires a kilobyte of
> - * memory, which is currently unreclaimed.
> - *
> - * This should be a multiple of a page.
> - *
> - * KERNEL_IMAGE_SIZE should be greater than pa(_end)
> - * and small than max_low_pfn, otherwise will waste some page table entries
> - */
> -
> -#if PTRS_PER_PMD > 1
> -#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD)
> -#else
> -#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD)
> -#endif
> -
>  #define SIZEOF_PTREGS 17*4
>  



In other words, this version of the patch needs:

diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index eca1c93..1f85ee8 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -42,9 +42,10 @@
 #define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability
 #define X86_VENDOR_ID  new_cpu_data+CPUINFO_x86_vendor_id
 
-/*
+
 #define SIZEOF_PTREGS 17*4
 
+/*
  * Worst-case size of the kernel mapping we need to make:
  * a relocatable kernel can live anywhere in lowmem, so we need to be able
  * to map all of lowmem.



-boris

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH] x86/boot/32: Convert the 32-bit pgtable setup code from assembly to C
  2017-01-05  8:52 ` Ingo Molnar
  2017-01-05 14:17   ` Boris Ostrovsky
@ 2017-01-05 20:12   ` kbuild test robot
  1 sibling, 0 replies; 9+ messages in thread
From: kbuild test robot @ 2017-01-05 20:12 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: kbuild-all, Boris Ostrovsky, tglx, mingo, hpa, x86, linux-kernel,
	xen-devel, matt

[-- Attachment #1: Type: text/plain, Size: 2166 bytes --]

Hi Ingo,

[auto build test ERROR on tip/auto-latest]
[also build test ERROR on v4.10-rc2 next-20170105]
[cannot apply to tip/x86/core]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Ingo-Molnar/x86-boot-32-Convert-the-32-bit-pgtable-setup-code-from-assembly-to-C/20170106-035845
config: i386-tinyconfig (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

All errors (new ones prefixed by >>):

   arch/x86/kernel/head_32.S: Assembler messages:
>> arch/x86/kernel/head_32.S:612: Error: can't resolve `init_thread_union' {*UND* section} - `SIZEOF_PTREGS' {*UND* section}

vim +612 arch/x86/kernel/head_32.S

11d4c3f9 arch/x86/kernel/head_32.S H. Peter Anvin 2011-02-04  606  .balign 4
b32f96c7 arch/x86/kernel/head_32.S Josh Poimboeuf 2016-08-18  607  ENTRY(initial_stack)
22dc3918 arch/x86/kernel/head_32.S Josh Poimboeuf 2016-09-21  608  	/*
22dc3918 arch/x86/kernel/head_32.S Josh Poimboeuf 2016-09-21  609  	 * The SIZEOF_PTREGS gap is a convention which helps the in-kernel
22dc3918 arch/x86/kernel/head_32.S Josh Poimboeuf 2016-09-21  610  	 * unwinder reliably detect the end of the stack.
22dc3918 arch/x86/kernel/head_32.S Josh Poimboeuf 2016-09-21  611  	 */
22dc3918 arch/x86/kernel/head_32.S Josh Poimboeuf 2016-09-21 @612  	.long init_thread_union + THREAD_SIZE - SIZEOF_PTREGS - \
22dc3918 arch/x86/kernel/head_32.S Josh Poimboeuf 2016-09-21  613  	      TOP_OF_KERNEL_STACK_PADDING;
^1da177e arch/i386/kernel/head.S   Linus Torvalds 2005-04-16  614  
4c5023a3 arch/x86/kernel/head_32.S H. Peter Anvin 2012-04-18  615  __INITRODATA

:::::: The code at line 612 was first introduced by commit
:::::: 22dc391865af29a1332bd1d17152f2ca7188bc4a x86/boot: Fix the end of the stack for idle tasks

:::::: TO: Josh Poimboeuf <jpoimboe@redhat.com>
:::::: CC: Ingo Molnar <mingo@kernel.org>

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 6397 bytes --]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [tip:x86/boot] x86/boot/32: Convert the 32-bit pgtable setup code from assembly to C
  2016-12-08 16:44 [PATCH] x86/head: Refactor 32-bit pgtable setup Boris Ostrovsky
  2016-12-09  4:33 ` Ingo Molnar
  2017-01-05  8:52 ` Ingo Molnar
@ 2017-01-06 11:00 ` tip-bot for Boris Ostrovsky
  2 siblings, 0 replies; 9+ messages in thread
From: tip-bot for Boris Ostrovsky @ 2017-01-06 11:00 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: tglx, luto, peterz, dvlasenk, brgerst, jpoimboe, mingo, torvalds,
	bp, linux-kernel, hpa, boris.ostrovsky

Commit-ID:  1e620f9b23e598ab936ece12233e98e97930b692
Gitweb:     http://git.kernel.org/tip/1e620f9b23e598ab936ece12233e98e97930b692
Author:     Boris Ostrovsky <boris.ostrovsky@oracle.com>
AuthorDate: Thu, 8 Dec 2016 11:44:31 -0500
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Fri, 6 Jan 2017 08:39:26 +0100

x86/boot/32: Convert the 32-bit pgtable setup code from assembly to C

The new Xen PVH entry point requires page tables to be setup by the
kernel since it is entered with paging disabled.

Pull the common code out of head_32.S so that mk_early_pgtbl_32() can be
invoked from both the new Xen entry point and the existing startup_32()
code.

Convert resulting common code to C.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: matt@codeblueprint.co.uk
Cc: xen-devel@lists.xenproject.org
Link: http://lkml.kernel.org/r/1481215471-9639-1-git-send-email-boris.ostrovsky@oracle.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/pgtable_32.h |  32 ++++++++++
 arch/x86/kernel/head32.c          |  62 +++++++++++++++++++
 arch/x86/kernel/head_32.S         | 121 +++-----------------------------------
 3 files changed, 101 insertions(+), 114 deletions(-)

diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index b6c0b40..fbc7336 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -27,6 +27,7 @@ struct vm_area_struct;
 
 extern pgd_t swapper_pg_dir[1024];
 extern pgd_t initial_page_table[1024];
+extern pmd_t initial_pg_pmd[];
 
 static inline void pgtable_cache_init(void) { }
 static inline void check_pgt_cache(void) { }
@@ -75,4 +76,35 @@ do {						\
 #define kern_addr_valid(kaddr)	(0)
 #endif
 
+/*
+ * This is how much memory in addition to the memory covered up to
+ * and including _end we need mapped initially.
+ * We need:
+ *     (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE)
+ *     (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE)
+ *
+ * Modulo rounding, each megabyte assigned here requires a kilobyte of
+ * memory, which is currently unreclaimed.
+ *
+ * This should be a multiple of a page.
+ *
+ * KERNEL_IMAGE_SIZE should be greater than pa(_end)
+ * and small than max_low_pfn, otherwise will waste some page table entries
+ */
+#if PTRS_PER_PMD > 1
+#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD)
+#else
+#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD)
+#endif
+
+/*
+ * Number of possible pages in the lowmem region.
+ *
+ * We shift 2 by 31 instead of 1 by 32 to the left in order to avoid a
+ * gas warning about overflowing shift count when gas has been compiled
+ * with only a host target support using a 32-bit type for internal
+ * representation.
+ */
+#define LOWMEM_PAGES ((((2<<31) - __PAGE_OFFSET) >> PAGE_SHIFT))
+
 #endif /* _ASM_X86_PGTABLE_32_H */
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index f16c55b..e5fb436 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -49,3 +49,65 @@ asmlinkage __visible void __init i386_start_kernel(void)
 
 	start_kernel();
 }
+
+/*
+ * Initialize page tables.  This creates a PDE and a set of page
+ * tables, which are located immediately beyond __brk_base.  The variable
+ * _brk_end is set up to point to the first "safe" location.
+ * Mappings are created both at virtual address 0 (identity mapping)
+ * and PAGE_OFFSET for up to _end.
+ *
+ * In PAE mode initial_page_table is statically defined to contain
+ * enough entries to cover the VMSPLIT option (that is the top 1, 2 or 3
+ * entries). The identity mapping is handled by pointing two PGD entries
+ * to the first kernel PMD. Note the upper half of each PMD or PTE are
+ * always zero at this stage.
+ */
+void __init mk_early_pgtbl_32(void)
+{
+#ifdef __pa
+#undef __pa
+#endif
+#define __pa(x)  ((unsigned long)(x) - PAGE_OFFSET)
+	pte_t pte, *ptep;
+	int i;
+	unsigned long *ptr;
+	/* Enough space to fit pagetables for the low memory linear map */
+	const unsigned long limit = __pa(_end) +
+		(PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT);
+#ifdef CONFIG_X86_PAE
+	pmd_t pl2, *pl2p = (pmd_t *)__pa(initial_pg_pmd);
+#define SET_PL2(pl2, val)    { (pl2).pmd = (val); }
+#else
+	pgd_t pl2, *pl2p = (pgd_t *)__pa(initial_page_table);
+#define SET_PL2(pl2, val)   { (pl2).pgd = (val); }
+#endif
+
+	ptep = (pte_t *)__pa(__brk_base);
+	pte.pte = PTE_IDENT_ATTR;
+
+	while ((pte.pte & PTE_PFN_MASK) < limit) {
+
+		SET_PL2(pl2, (unsigned long)ptep | PDE_IDENT_ATTR);
+		*pl2p = pl2;
+#ifndef CONFIG_X86_PAE
+		/* Kernel PDE entry */
+		*(pl2p +  ((PAGE_OFFSET >> PGDIR_SHIFT))) = pl2;
+#endif
+		for (i = 0; i < PTRS_PER_PTE; i++) {
+			*ptep = pte;
+			pte.pte += PAGE_SIZE;
+			ptep++;
+		}
+
+		pl2p++;
+	}
+
+	ptr = (unsigned long *)__pa(&max_pfn_mapped);
+	/* Can't use pte_pfn() since it's a call with CONFIG_PARAVIRT */
+	*ptr = (pte.pte & PTE_PFN_MASK) >> PAGE_SHIFT;
+
+	ptr = (unsigned long *)__pa(&_brk_end);
+	*ptr = (unsigned long)ptep + PAGE_OFFSET;
+}
+
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 4e8577d..1f85ee8 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -24,6 +24,7 @@
 #include <asm/nops.h>
 #include <asm/bootparam.h>
 #include <asm/export.h>
+#include <asm/pgtable_32.h>
 
 /* Physical address */
 #define pa(X) ((X) - __PAGE_OFFSET)
@@ -41,44 +42,10 @@
 #define X86_CAPABILITY	new_cpu_data+CPUINFO_x86_capability
 #define X86_VENDOR_ID	new_cpu_data+CPUINFO_x86_vendor_id
 
-/*
- * This is how much memory in addition to the memory covered up to
- * and including _end we need mapped initially.
- * We need:
- *     (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE)
- *     (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE)
- *
- * Modulo rounding, each megabyte assigned here requires a kilobyte of
- * memory, which is currently unreclaimed.
- *
- * This should be a multiple of a page.
- *
- * KERNEL_IMAGE_SIZE should be greater than pa(_end)
- * and small than max_low_pfn, otherwise will waste some page table entries
- */
-
-#if PTRS_PER_PMD > 1
-#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD)
-#else
-#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD)
-#endif
 
 #define SIZEOF_PTREGS 17*4
 
 /*
- * Number of possible pages in the lowmem region.
- *
- * We shift 2 by 31 instead of 1 by 32 to the left in order to avoid a
- * gas warning about overflowing shift count when gas has been compiled
- * with only a host target support using a 32-bit type for internal
- * representation.
- */
-LOWMEM_PAGES = (((2<<31) - __PAGE_OFFSET) >> PAGE_SHIFT)
-
-/* Enough space to fit pagetables for the low memory linear map */
-MAPPING_BEYOND_END = PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT
-
-/*
  * Worst-case size of the kernel mapping we need to make:
  * a relocatable kernel can live anywhere in lowmem, so we need to be able
  * to map all of lowmem.
@@ -160,90 +127,15 @@ ENTRY(startup_32)
 	call load_ucode_bsp
 #endif
 
-/*
- * Initialize page tables.  This creates a PDE and a set of page
- * tables, which are located immediately beyond __brk_base.  The variable
- * _brk_end is set up to point to the first "safe" location.
- * Mappings are created both at virtual address 0 (identity mapping)
- * and PAGE_OFFSET for up to _end.
- */
-#ifdef CONFIG_X86_PAE
-
-	/*
-	 * In PAE mode initial_page_table is statically defined to contain
-	 * enough entries to cover the VMSPLIT option (that is the top 1, 2 or 3
-	 * entries). The identity mapping is handled by pointing two PGD entries
-	 * to the first kernel PMD.
-	 *
-	 * Note the upper half of each PMD or PTE are always zero at this stage.
-	 */
-
-#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */
-
-	xorl %ebx,%ebx				/* %ebx is kept at zero */
-
-	movl $pa(__brk_base), %edi
-	movl $pa(initial_pg_pmd), %edx
-	movl $PTE_IDENT_ATTR, %eax
-10:
-	leal PDE_IDENT_ATTR(%edi),%ecx		/* Create PMD entry */
-	movl %ecx,(%edx)			/* Store PMD entry */
-						/* Upper half already zero */
-	addl $8,%edx
-	movl $512,%ecx
-11:
-	stosl
-	xchgl %eax,%ebx
-	stosl
-	xchgl %eax,%ebx
-	addl $0x1000,%eax
-	loop 11b
-
-	/*
-	 * End condition: we must map up to the end + MAPPING_BEYOND_END.
-	 */
-	movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp
-	cmpl %ebp,%eax
-	jb 10b
-1:
-	addl $__PAGE_OFFSET, %edi
-	movl %edi, pa(_brk_end)
-	shrl $12, %eax
-	movl %eax, pa(max_pfn_mapped)
+	/* Create early pagetables. */
+	call  mk_early_pgtbl_32
 
 	/* Do early initialization of the fixmap area */
 	movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax
+#ifdef  CONFIG_X86_PAE
+#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */
 	movl %eax,pa(initial_pg_pmd+0x1000*KPMDS-8)
-#else	/* Not PAE */
-
-page_pde_offset = (__PAGE_OFFSET >> 20);
-
-	movl $pa(__brk_base), %edi
-	movl $pa(initial_page_table), %edx
-	movl $PTE_IDENT_ATTR, %eax
-10:
-	leal PDE_IDENT_ATTR(%edi),%ecx		/* Create PDE entry */
-	movl %ecx,(%edx)			/* Store identity PDE entry */
-	movl %ecx,page_pde_offset(%edx)		/* Store kernel PDE entry */
-	addl $4,%edx
-	movl $1024, %ecx
-11:
-	stosl
-	addl $0x1000,%eax
-	loop 11b
-	/*
-	 * End condition: we must map up to the end + MAPPING_BEYOND_END.
-	 */
-	movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp
-	cmpl %ebp,%eax
-	jb 10b
-	addl $__PAGE_OFFSET, %edi
-	movl %edi, pa(_brk_end)
-	shrl $12, %eax
-	movl %eax, pa(max_pfn_mapped)
-
-	/* Do early initialization of the fixmap area */
-	movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax
+#else
 	movl %eax,pa(initial_page_table+0xffc)
 #endif
 
@@ -666,6 +558,7 @@ ENTRY(setup_once_ref)
 __PAGE_ALIGNED_BSS
 	.align PAGE_SIZE
 #ifdef CONFIG_X86_PAE
+.globl initial_pg_pmd
 initial_pg_pmd:
 	.fill 1024*KPMDS,4,0
 #else

^ permalink raw reply related	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2017-01-06 11:02 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-12-08 16:44 [PATCH] x86/head: Refactor 32-bit pgtable setup Boris Ostrovsky
2016-12-09  4:33 ` Ingo Molnar
2016-12-09 14:33   ` Boris Ostrovsky
2016-12-18  8:44     ` Ingo Molnar
2016-12-19 14:09       ` Boris Ostrovsky
2017-01-05  8:52 ` Ingo Molnar
2017-01-05 14:17   ` Boris Ostrovsky
2017-01-05 20:12   ` [PATCH] x86/boot/32: Convert the 32-bit pgtable setup code from assembly to C kbuild test robot
2017-01-06 11:00 ` [tip:x86/boot] " tip-bot for Boris Ostrovsky

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).