All of lore.kernel.org
 help / color / mirror / Atom feed
* "x86-64, mm: Put early page table high" causes crash on Xen
@ 2011-03-01 17:21 Stefano Stabellini
  2011-03-01 21:47 ` Yinghai Lu
  0 siblings, 1 reply; 13+ messages in thread
From: Stefano Stabellini @ 2011-03-01 17:21 UTC (permalink / raw)
  To: Yinghai Lu; +Cc: linux-kernel, Konrad Rzeszutek Wilk, Jeremy Fitzhardinge

Yinghai Lu,
while testing tip/x86/mm on Xen I found out that the commit "x86-64, mm:
Put early page table high" reliably crashes Linux at boot.
The reason is well explained by the commit message of
fef5ba797991f9335bcfc295942b684f9bf613a1:

"Xen requires that all pages containing pagetable entries to be mapped
read-only.  If pages used for the initial pagetable are already mapped
then we can change the mapping to RO.  However, if they are initially
unmapped, we need to make sure that when they are later mapped, they
are also mapped RO.

We do this by knowing that the kernel pagetable memory is pre-allocated
in the range e820_table_start - e820_table_end, so any pfn within this
range should be mapped read-only.  However, the pagetable setup code
early_ioremaps the pages to write their entries, so we must make sure
that mappings created in the early_ioremap fixmap area are mapped RW.
(Those mappings are removed before the pages are presented to Xen
as pagetable pages.)"

In other words mask_rw_pte (called by xen_set_pte_init) should mark RO
the already existing pagetable pages (like the ones belonging to the
initial mappings), while it should mark RW the new pages not yet hooked
into the pagetable.  This is what the following lines used to achieve,
but don't anymore:

    /*
	 * If the new pfn is within the range of the newly allocated
	 * kernel pagetable, and it isn't being mapped into an
	 * early_ioremap fixmap slot, make sure it is RO.
	 */
    if (!is_early_ioremap_ptep(ptep) &&
	    pfn >= pgt_buf_start && pfn < pgt_buf_end)
		pte = pte_wrprotect(pte);

Unfortunately now we map the already existing initial pagetable pages a
second time and the new zeroed pages using map_low_page, so we are
unable to distinguish between the two.

Can we go back to the previous way of accessing pagetable pages from
kernel_physical_mapping_init, while keeping the new pagetable allocation
strategy? It seems to me that the introduction of map_low_page is not
actually required, is it? In that case we could just revert that bit...
(appended partial revert example).
Otherwise is there a way we can adapt mask_rw_pte to do the right thing
even in the new scenario?
Suggestions are very welcome.

- Stefano


---


diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 13b6fb1..413e140 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -334,28 +334,12 @@ static __ref void *alloc_low_page(unsigned long *phys)
 	return adr;
 }
 
-static __ref void *map_low_page(void *virt)
-{
-	void *adr;
-	unsigned long phys, left;
-
-	if (after_bootmem)
-		return virt;
-
-	phys = __pa(virt);
-	left = phys & (PAGE_SIZE - 1);
-	adr = early_memremap(phys & PAGE_MASK, PAGE_SIZE);
-	adr = (void *)(((unsigned long)adr) | left);
-
-	return adr;
-}
-
 static __ref void unmap_low_page(void *adr)
 {
 	if (after_bootmem)
 		return;
 
-	early_iounmap((void *)((unsigned long)adr & PAGE_MASK), PAGE_SIZE);
+	early_iounmap(adr, PAGE_SIZE);
 }
 
 static unsigned long __meminit
@@ -403,6 +387,15 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end,
 }
 
 static unsigned long __meminit
+phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end,
+		pgprot_t prot)
+{
+	pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd);
+
+	return phys_pte_init(pte, address, end, prot);
+}
+
+static unsigned long __meminit
 phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
 	      unsigned long page_size_mask, pgprot_t prot)
 {
@@ -428,10 +421,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
 		if (pmd_val(*pmd)) {
 			if (!pmd_large(*pmd)) {
 				spin_lock(&init_mm.page_table_lock);
-				pte = map_low_page((pte_t *)pmd_page_vaddr(*pmd));
-				last_map_addr = phys_pte_init(pte, address,
+				last_map_addr = phys_pte_update(pmd, address,
 								end, prot);
-				unmap_low_page(pte);
 				spin_unlock(&init_mm.page_table_lock);
 				continue;
 			}
@@ -478,6 +469,18 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
 }
 
 static unsigned long __meminit
+phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end,
+		unsigned long page_size_mask, pgprot_t prot)
+{
+	pmd_t *pmd = pmd_offset(pud, 0);
+	unsigned long last_map_addr;
+
+	last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask, prot);
+	__flush_tlb_all();
+	return last_map_addr;
+}
+
+static unsigned long __meminit
 phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
 			 unsigned long page_size_mask)
 {
@@ -502,11 +505,8 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
 
 		if (pud_val(*pud)) {
 			if (!pud_large(*pud)) {
-				pmd = map_low_page(pmd_offset(pud, 0));
-				last_map_addr = phys_pmd_init(pmd, addr, end,
+				last_map_addr = phys_pmd_update(pud, addr, end,
 							 page_size_mask, prot);
-				unmap_low_page(pmd);
-				__flush_tlb_all();
 				continue;
 			}
 			/*
@@ -554,6 +554,17 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
 	return last_map_addr;
 }
 
+static unsigned long __meminit
+phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end,
+		 unsigned long page_size_mask)
+{
+	pud_t *pud;
+
+	pud = (pud_t *)pgd_page_vaddr(*pgd);
+
+	return phys_pud_init(pud, addr, end, page_size_mask);
+}
+
 unsigned long __meminit
 kernel_physical_mapping_init(unsigned long start,
 			     unsigned long end,
@@ -577,10 +588,8 @@ kernel_physical_mapping_init(unsigned long start,
 			next = end;
 
 		if (pgd_val(*pgd)) {
-			pud = map_low_page((pud_t *)pgd_page_vaddr(*pgd));
-			last_map_addr = phys_pud_init(pud, __pa(start),
+			last_map_addr = phys_pud_update(pgd, __pa(start),
 						 __pa(end), page_size_mask);
-			unmap_low_page(pud);
 			continue;
 		}
 

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: "x86-64, mm: Put early page table high" causes crash on Xen
  2011-03-01 17:21 "x86-64, mm: Put early page table high" causes crash on Xen Stefano Stabellini
@ 2011-03-01 21:47 ` Yinghai Lu
  2011-03-02 15:23   ` Stefano Stabellini
  2011-03-07 15:47   ` Stefano Stabellini
  0 siblings, 2 replies; 13+ messages in thread
From: Yinghai Lu @ 2011-03-01 21:47 UTC (permalink / raw)
  To: Stefano Stabellini
  Cc: linux-kernel, Konrad Rzeszutek Wilk, Jeremy Fitzhardinge

On 03/01/2011 09:21 AM, Stefano Stabellini wrote:
> Yinghai Lu,
> while testing tip/x86/mm on Xen I found out that the commit "x86-64, mm:
> Put early page table high" reliably crashes Linux at boot.
> The reason is well explained by the commit message of
> fef5ba797991f9335bcfc295942b684f9bf613a1:
> 
> "Xen requires that all pages containing pagetable entries to be mapped
> read-only.  If pages used for the initial pagetable are already mapped
> then we can change the mapping to RO.  However, if they are initially
> unmapped, we need to make sure that when they are later mapped, they
> are also mapped RO.
> 
> We do this by knowing that the kernel pagetable memory is pre-allocated
> in the range e820_table_start - e820_table_end, so any pfn within this
> range should be mapped read-only.  However, the pagetable setup code
> early_ioremaps the pages to write their entries, so we must make sure
> that mappings created in the early_ioremap fixmap area are mapped RW.
> (Those mappings are removed before the pages are presented to Xen
> as pagetable pages.)"
> 
> In other words mask_rw_pte (called by xen_set_pte_init) should mark RO
> the already existing pagetable pages (like the ones belonging to the
> initial mappings), while it should mark RW the new pages not yet hooked
> into the pagetable.  This is what the following lines used to achieve,
> but don't anymore:
> 
>     /*
> 	 * If the new pfn is within the range of the newly allocated
> 	 * kernel pagetable, and it isn't being mapped into an
> 	 * early_ioremap fixmap slot, make sure it is RO.
> 	 */
>     if (!is_early_ioremap_ptep(ptep) &&
> 	    pfn >= pgt_buf_start && pfn < pgt_buf_end)
> 		pte = pte_wrprotect(pte);
> 
> Unfortunately now we map the already existing initial pagetable pages a
> second time and the new zeroed pages using map_low_page, so we are
> unable to distinguish between the two.
> 
> Can we go back to the previous way of accessing pagetable pages from
> kernel_physical_mapping_init, while keeping the new pagetable allocation
> strategy? It seems to me that the introduction of map_low_page is not
> actually required, is it? In that case we could just revert that bit...
> (appended partial revert example).

We do need map_low_page ( BTW, that name is totally misleading...)

the reason is we put page_table high and at that time is not under max_pfn_mapped. (aka not mapped).

So have to use 
	adr = early_memremap(phys & PAGE_MASK, PAGE_SIZE);
to early map it and Read/Write to it.

Thanks

Yinghai

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: "x86-64, mm: Put early page table high" causes crash on Xen
  2011-03-01 21:47 ` Yinghai Lu
@ 2011-03-02 15:23   ` Stefano Stabellini
  2011-03-02 17:06     ` Konrad Rzeszutek Wilk
  2011-03-07 15:47   ` Stefano Stabellini
  1 sibling, 1 reply; 13+ messages in thread
From: Stefano Stabellini @ 2011-03-02 15:23 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Stefano Stabellini, linux-kernel, Konrad Rzeszutek Wilk,
	Jeremy Fitzhardinge

On Tue, 1 Mar 2011, Yinghai Lu wrote:
> We do need map_low_page ( BTW, that name is totally misleading...)
> 
> the reason is we put page_table high and at that time is not under max_pfn_mapped. (aka not mapped).
> 
> So have to use 
> 	adr = early_memremap(phys & PAGE_MASK, PAGE_SIZE);
> to early map it and Read/Write to it.
 
I think I have figured out a way to update the logic of mask_rw_pte to
account for the new way of allocating kernel pagetables.
The appended patch fix the boot crash for me.

---

xen: update mask_rw_pte after kernel page tables init changes

Already existing kernel page table pages can now be mapped using
early_ioremap too so we need to update mask_rw_pte to make sure these
pages are still mapped RO.
We do that by mapping RO all the pages mapped using early_ioremap apart
from the last one that has been allocated because it is not a page table
page yet (it has been hooked into the page tables yet).

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 62192cd..2ff68be 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1440,10 +1440,12 @@ static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
 	/*
 	 * If the new pfn is within the range of the newly allocated
 	 * kernel pagetable, and it isn't being mapped into an
-	 * early_ioremap fixmap slot, make sure it is RO.
+	 * early_ioremap fixmap slot as a freshly allocated page, make sure
+	 * it is RO.
 	 */
-	if (!is_early_ioremap_ptep(ptep) &&
-	    pfn >= pgt_buf_start && pfn < pgt_buf_end)
+	if (((!is_early_ioremap_ptep(ptep) &&
+			pfn >= pgt_buf_start && pfn < pgt_buf_end)) ||
+			(is_early_ioremap_ptep(ptep) && pfn != (pgt_buf_end - 1)))
 		pte = pte_wrprotect(pte);
 
 	return pte;

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: "x86-64, mm: Put early page table high" causes crash on Xen
  2011-03-02 15:23   ` Stefano Stabellini
@ 2011-03-02 17:06     ` Konrad Rzeszutek Wilk
  2011-03-07 15:55       ` Stefano Stabellini
  0 siblings, 1 reply; 13+ messages in thread
From: Konrad Rzeszutek Wilk @ 2011-03-02 17:06 UTC (permalink / raw)
  To: Stefano Stabellini; +Cc: Yinghai Lu, linux-kernel, Jeremy Fitzhardinge

On Wed, Mar 02, 2011 at 03:23:48PM +0000, Stefano Stabellini wrote:
> On Tue, 1 Mar 2011, Yinghai Lu wrote:
> > We do need map_low_page ( BTW, that name is totally misleading...)
> > 
> > the reason is we put page_table high and at that time is not under max_pfn_mapped. (aka not mapped).
> > 
> > So have to use 
> > 	adr = early_memremap(phys & PAGE_MASK, PAGE_SIZE);
> > to early map it and Read/Write to it.
>  
> I think I have figured out a way to update the logic of mask_rw_pte to
> account for the new way of allocating kernel pagetables.
> The appended patch fix the boot crash for me.
> 
> ---
> 
> xen: update mask_rw_pte after kernel page tables init changes
> 
> Already existing kernel page table pages can now be mapped using
> early_ioremap too so we need to update mask_rw_pte to make sure these
> pages are still mapped RO.
> We do that by mapping RO all the pages mapped using early_ioremap apart
> from the last one that has been allocated because it is not a page table
> page yet (it has been hooked into the page tables yet).
                  ^- has not?

..this is b/c the initial_kernel_mapping and its family of calls update
the pgt_buf_end (used to be called e820_table_end), _before__
it calls the Xen MMU to set the PTE entries and then after it is done
update the PMD?. Hence the simple check to see if the PFN is the _old_
value of the pgt_buf_end and if so skip altering the mapping from RW to RO
and leave them be?

On subsequent passes we fall within the first conditional
and update the PTE to RO? When is that pass done?


You also might want to mention the git commits that inspired this patch
and include the nice description you provided in the first email of this
thread.


> 
> Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
> 
> diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
> index 62192cd..2ff68be 100644
> --- a/arch/x86/xen/mmu.c
> +++ b/arch/x86/xen/mmu.c
> @@ -1440,10 +1440,12 @@ static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
>  	/*
>  	 * If the new pfn is within the range of the newly allocated
>  	 * kernel pagetable, and it isn't being mapped into an
> -	 * early_ioremap fixmap slot, make sure it is RO.
> +	 * early_ioremap fixmap slot as a freshly allocated page, make sure
> +	 * it is RO.
>  	 */
> -	if (!is_early_ioremap_ptep(ptep) &&
> -	    pfn >= pgt_buf_start && pfn < pgt_buf_end)
> +	if (((!is_early_ioremap_ptep(ptep) &&
> +			pfn >= pgt_buf_start && pfn < pgt_buf_end)) ||
> +			(is_early_ioremap_ptep(ptep) && pfn != (pgt_buf_end - 1)))
>  		pte = pte_wrprotect(pte);
>  
>  	return pte;

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: "x86-64, mm: Put early page table high" causes crash on Xen
  2011-03-01 21:47 ` Yinghai Lu
  2011-03-02 15:23   ` Stefano Stabellini
@ 2011-03-07 15:47   ` Stefano Stabellini
  2011-03-07 19:17     ` Yinghai Lu
  1 sibling, 1 reply; 13+ messages in thread
From: Stefano Stabellini @ 2011-03-07 15:47 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Stefano Stabellini, linux-kernel, Konrad Rzeszutek Wilk,
	Jeremy Fitzhardinge

On Tue, 1 Mar 2011, Yinghai Lu wrote:
> On 03/01/2011 09:21 AM, Stefano Stabellini wrote:
> > Yinghai Lu,
> > while testing tip/x86/mm on Xen I found out that the commit "x86-64, mm:
> > Put early page table high" reliably crashes Linux at boot.
> > The reason is well explained by the commit message of
> > fef5ba797991f9335bcfc295942b684f9bf613a1:
> > 
> > "Xen requires that all pages containing pagetable entries to be mapped
> > read-only.  If pages used for the initial pagetable are already mapped
> > then we can change the mapping to RO.  However, if they are initially
> > unmapped, we need to make sure that when they are later mapped, they
> > are also mapped RO.
> > 
> > We do this by knowing that the kernel pagetable memory is pre-allocated
> > in the range e820_table_start - e820_table_end, so any pfn within this
> > range should be mapped read-only.  However, the pagetable setup code
> > (Those mappings are removed before the pages are presented to Xen
> > as pagetable pages.)"
> > 
> > In other words mask_rw_pte (called by xen_set_pte_init) should mark RO
> > the already existing pagetable pages (like the ones belonging to the
> > initial mappings), while it should mark RW the new pages not yet hooked
> > into the pagetable.  This is what the following lines used to achieve,
> > but don't anymore:
> > 
> >     /*
> > 	 * If the new pfn is within the range of the newly allocated
> > 	 * kernel pagetable, and it isn't being mapped into an
> > 	 */
> > 	    pfn >= pgt_buf_start && pfn < pgt_buf_end)
> > 		pte = pte_wrprotect(pte);
> > 
> > Unfortunately now we map the already existing initial pagetable pages a
> > second time and the new zeroed pages using map_low_page, so we are
> > unable to distinguish between the two.
> > 
> > Can we go back to the previous way of accessing pagetable pages from
> > kernel_physical_mapping_init, while keeping the new pagetable allocation
> > strategy? It seems to me that the introduction of map_low_page is not
> > actually required, is it? In that case we could just revert that bit...
> > (appended partial revert example).
> 
> We do need map_low_page ( BTW, that name is totally misleading...)
> 
> the reason is we put page_table high and at that time is not under max_pfn_mapped. (aka not mapped).
> 
> So have to use 
> 	adr = early_memremap(phys & PAGE_MASK, PAGE_SIZE);
> to early map it and Read/Write to it.

As you might already know from my other email few days ago, I have found
a solution to this problem modifying only Xen specific code.
However I went back to this because I want to be sure that the patch is
the correct solution rather than a workaround.

It seems to me that only pagetable pages freshly allocated with
alloc_low_page needs to be mapped because initial pagetable pages are
under max_pfn_mapped.
Considering that kernel_physical_mapping_init doesn't go through the
same pagetable page twice, it is never the case that we map_low_page
anything but initial pagetable pages that are under max_pfn_mapped
anyway.
For example this is what happens on my machine:


[    0.000000] initial memory mapped : 0 - 024be000
[    0.000000] init_memory_mapping: 0000000000000000-00000000cffc2000
[    0.000000]  0000000000 - 00cffc2000 page 4k
[    0.000000] kernel direct mapping tables up to cffc2000 @ cf93d000-cffc2000
[    0.000000] DEBUG map low page(2004000, 00001000)
[    0.000000] DEBUG map low page(2008000, 00001000)
[    0.000000] DEBUG map low page(2c62000, 00001000)
[    0.000000] DEBUG map low page(2c65000, 00001000)
[    0.000000] DEBUG map low page(2c66000, 00001000)
[    0.000000] DEBUG map low page(2c67000, 00001000)
[    0.000000] DEBUG map low page(2c68000, 00001000)
[    0.000000] DEBUG map low page(2c69000, 00001000)
[    0.000000] DEBUG map low page(2c6a000, 00001000)
[    0.000000] DEBUG map low page(2c6b000, 00001000)
[    0.000000] DEBUG map low page(2c6c000, 00001000)
[    0.000000] DEBUG map low page(2c6d000, 00001000)
[    0.000000] DEBUG map low page(2c6e000, 00001000)
[    0.000000] DEBUG map low page(2c6f000, 00001000)
[    0.000000] DEBUG map low page(2c70000, 00001000)
[    0.000000] DEBUG map low page(2c71000, 00001000)
[    0.000000] DEBUG map low page(2c72000, 00001000)
[    0.000000] DEBUG map low page(2c73000, 00001000)
[    0.000000] DEBUG map low page(2c74000, 00001000)
[    0.000000] DEBUG map low page(2c75000, 00001000)
[    0.000000] DEBUG map low page(2c76000, 00001000)
[    0.000000] DEBUG map low page(2c77000, 00001000)
[    0.000000] DEBUG map low page(2c78000, 00001000)
[    0.000000] DEBUG map low page(2c79000, 00001000)
[    0.000000] DEBUG map low page(2c7a000, 00001000)
[    0.000000] DEBUG map low page(2c7b000, 00001000)
[    0.000000] DEBUG map low page(239a000, 00001000)
[    0.000000] DEBUG map low page(239b000, 00001000)
[    0.000000] DEBUG map low page(239c000, 00001000)
[    0.000000] DEBUG map low page(239d000, 00001000)
[    0.000000] DEBUG alloc low page(cf93d000, 00001000)
[    0.000000] DEBUG alloc low page(cf93e000, 00001000)
[    0.000000] DEBUG alloc low page(cf93f000, 00001000)
[    0.000000] DEBUG alloc low page(cf940000, 00001000)
[    0.000000] DEBUG alloc low page(cf941000, 00001000)
[    0.000000] DEBUG alloc low page(cf942000, 00001000)
[    0.000000] DEBUG alloc low page(cf943000, 00001000)
[    0.000000] DEBUG alloc low page(cf944000, 00001000)
[    0.000000] DEBUG alloc low page(cf945000, 00001000)
[    0.000000] DEBUG alloc low page(cf946000, 00001000)
[    0.000000] DEBUG alloc low page(cf947000, 00001000)
[    0.000000] DEBUG alloc low page(cf948000, 00001000)
[    0.000000] DEBUG alloc low page(cf949000, 00001000)
[    0.000000] DEBUG alloc low page(cf94a000, 00001000)
[    0.000000] DEBUG alloc low page(cf94b000, 00001000)
[    0.000000] DEBUG alloc low page(cf94c000, 00001000)
[    0.000000] DEBUG alloc low page(cf94d000, 00001000)
[    0.000000] DEBUG alloc low page(cf94e000, 00001000)
[    0.000000] DEBUG alloc low page(cf94f000, 00001000)
[    0.000000] DEBUG alloc low page(cf950000, 00001000)
[    0.000000] DEBUG alloc low page(cf951000, 00001000)
[    0.000000] DEBUG alloc low page(cf952000, 00001000)
[    0.000000] DEBUG alloc low page(cf953000, 00001000)
[    0.000000] DEBUG alloc low page(cf954000, 00001000)
[    0.000000] DEBUG alloc low page(cf955000, 00001000)
[    0.000000] DEBUG alloc low page(cf956000, 00001000)
[    0.000000] DEBUG alloc low page(cf957000, 00001000)
[    0.000000] DEBUG alloc low page(cf958000, 00001000)
[    0.000000] DEBUG alloc low page(cf959000, 00001000)
[    0.000000] DEBUG alloc low page(cf95a000, 00001000)
[    0.000000] DEBUG alloc low page(cf95b000, 00001000)
[    0.000000] DEBUG alloc low page(cf95c000, 00001000)
[    0.000000] DEBUG alloc low page(cf95d000, 00001000)
[    0.000000] DEBUG alloc low page(cf95e000, 00001000)
[    0.000000] DEBUG alloc low page(cf95f000, 00001000)
[    0.000000] DEBUG alloc low page(cf960000, 00001000)
[    0.000000] DEBUG alloc low page(cf961000, 00001000)
[    0.000000] DEBUG alloc low page(cf962000, 00001000)
[    0.000000] DEBUG alloc low page(cf963000, 00001000)
[    0.000000] DEBUG alloc low page(cf964000, 00001000)
[    0.000000] DEBUG alloc low page(cf965000, 00001000)


if this is the case the introduction of map_low_page is unnecessary and
actually makes the kernel map pages that are already mapped anyway.
Am I correct?

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: "x86-64, mm: Put early page table high" causes crash on Xen
  2011-03-02 17:06     ` Konrad Rzeszutek Wilk
@ 2011-03-07 15:55       ` Stefano Stabellini
  0 siblings, 0 replies; 13+ messages in thread
From: Stefano Stabellini @ 2011-03-07 15:55 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk
  Cc: Stefano Stabellini, Yinghai Lu, linux-kernel, Jeremy Fitzhardinge

On Wed, 2 Mar 2011, Konrad Rzeszutek Wilk wrote:
> On Wed, Mar 02, 2011 at 03:23:48PM +0000, Stefano Stabellini wrote:
> > On Tue, 1 Mar 2011, Yinghai Lu wrote:
> > > We do need map_low_page ( BTW, that name is totally misleading...)
> > > 
> > > the reason is we put page_table high and at that time is not under max_pfn_mapped. (aka not mapped).
> > > 
> > > So have to use 
> > > 	adr = early_memremap(phys & PAGE_MASK, PAGE_SIZE);
> > > to early map it and Read/Write to it.
> >  
> > I think I have figured out a way to update the logic of mask_rw_pte to
> > account for the new way of allocating kernel pagetables.
> > The appended patch fix the boot crash for me.
> > 
> > ---
> > 
> > xen: update mask_rw_pte after kernel page tables init changes
> > 
> > Already existing kernel page table pages can now be mapped using
> > early_ioremap too so we need to update mask_rw_pte to make sure these
> > pages are still mapped RO.
> > We do that by mapping RO all the pages mapped using early_ioremap apart
> > from the last one that has been allocated because it is not a page table
> > page yet (it has been hooked into the page tables yet).
>                   ^- has not?
> 
> ..this is b/c the initial_kernel_mapping and its family of calls update
> the pgt_buf_end (used to be called e820_table_end), _before__
> it calls the Xen MMU to set the PTE entries and then after it is done
> update the PMD?. Hence the simple check to see if the PFN is the _old_
> value of the pgt_buf_end and if so skip altering the mapping from RW to RO
> and leave them be?

yes and yes


> On subsequent passes we fall within the first conditional
> and update the PTE to RO?
> When is that pass done?

kernel_physical_mapping_init doesn't go through the same pagetable
pages twice so that is not the issue.
The problem is that kernel_physical_mapping_init is now re-mapping all
the initial pagetable pages using early_ioremap, so the usage of
early_ioremap is not enough anymore to distinguish between freshly
allocated page table pages and already hooked pagetable pages.
I am investigating the possibility that the introduction of map_low_page
is actually overkill for this reason.


> You also might want to mention the git commits that inspired this patch
> and include the nice description you provided in the first email of this
> thread.

sure

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: "x86-64, mm: Put early page table high" causes crash on Xen
  2011-03-07 15:47   ` Stefano Stabellini
@ 2011-03-07 19:17     ` Yinghai Lu
  2011-03-08 12:14       ` Stefano Stabellini
  0 siblings, 1 reply; 13+ messages in thread
From: Yinghai Lu @ 2011-03-07 19:17 UTC (permalink / raw)
  To: Stefano Stabellini
  Cc: linux-kernel, Konrad Rzeszutek Wilk, Jeremy Fitzhardinge

On Mon, Mar 7, 2011 at 7:47 AM, Stefano Stabellini
<stefano.stabellini@eu.citrix.com> wrote:
> On Tue, 1 Mar 2011, Yinghai Lu wrote:
>> On 03/01/2011 09:21 AM, Stefano Stabellini wrote:
>> > Yinghai Lu,
>> > while testing tip/x86/mm on Xen I found out that the commit "x86-64, mm:
>> > Put early page table high" reliably crashes Linux at boot.
>> > The reason is well explained by the commit message of
>> > fef5ba797991f9335bcfc295942b684f9bf613a1:
>> >
>> > "Xen requires that all pages containing pagetable entries to be mapped
>> > read-only.  If pages used for the initial pagetable are already mapped
>> > then we can change the mapping to RO.  However, if they are initially
>> > unmapped, we need to make sure that when they are later mapped, they
>> > are also mapped RO.
>> >
>> > We do this by knowing that the kernel pagetable memory is pre-allocated
>> > in the range e820_table_start - e820_table_end, so any pfn within this
>> > range should be mapped read-only.  However, the pagetable setup code
>> > (Those mappings are removed before the pages are presented to Xen
>> > as pagetable pages.)"
>> >
>> > In other words mask_rw_pte (called by xen_set_pte_init) should mark RO
>> > the already existing pagetable pages (like the ones belonging to the
>> > initial mappings), while it should mark RW the new pages not yet hooked
>> > into the pagetable.  This is what the following lines used to achieve,
>> > but don't anymore:
>> >
>> >     /*
>> >      * If the new pfn is within the range of the newly allocated
>> >      * kernel pagetable, and it isn't being mapped into an
>> >      */
>> >         pfn >= pgt_buf_start && pfn < pgt_buf_end)
>> >             pte = pte_wrprotect(pte);
>> >
>> > Unfortunately now we map the already existing initial pagetable pages a
>> > second time and the new zeroed pages using map_low_page, so we are
>> > unable to distinguish between the two.
>> >
>> > Can we go back to the previous way of accessing pagetable pages from
>> > kernel_physical_mapping_init, while keeping the new pagetable allocation
>> > strategy? It seems to me that the introduction of map_low_page is not
>> > actually required, is it? In that case we could just revert that bit...
>> > (appended partial revert example).
>>
>> We do need map_low_page ( BTW, that name is totally misleading...)
>>
>> the reason is we put page_table high and at that time is not under max_pfn_mapped. (aka not mapped).
>>
>> So have to use
>>       adr = early_memremap(phys & PAGE_MASK, PAGE_SIZE);
>> to early map it and Read/Write to it.
>
> As you might already know from my other email few days ago, I have found
> a solution to this problem modifying only Xen specific code.
> However I went back to this because I want to be sure that the patch is
> the correct solution rather than a workaround.
>
> It seems to me that only pagetable pages freshly allocated with
> alloc_low_page needs to be mapped because initial pagetable pages are
> under max_pfn_mapped.
> Considering that kernel_physical_mapping_init doesn't go through the
> same pagetable page twice, it is never the case that we map_low_page
> anything but initial pagetable pages that are under max_pfn_mapped
> anyway.
> For example this is what happens on my machine:
>
>
> [    0.000000] initial memory mapped : 0 - 024be000
> [    0.000000] init_memory_mapping: 0000000000000000-00000000cffc2000
> [    0.000000]  0000000000 - 00cffc2000 page 4k
> [    0.000000] kernel direct mapping tables up to cffc2000 @ cf93d000-cffc2000
> [    0.000000] DEBUG map low page(2004000, 00001000)
> [    0.000000] DEBUG map low page(2008000, 00001000)
> [    0.000000] DEBUG map low page(2c62000, 00001000)
> [    0.000000] DEBUG map low page(2c65000, 00001000)
> [    0.000000] DEBUG map low page(2c66000, 00001000)
> [    0.000000] DEBUG map low page(2c67000, 00001000)
> [    0.000000] DEBUG map low page(2c68000, 00001000)
> [    0.000000] DEBUG map low page(2c69000, 00001000)
> [    0.000000] DEBUG map low page(2c6a000, 00001000)
> [    0.000000] DEBUG map low page(2c6b000, 00001000)
> [    0.000000] DEBUG map low page(2c6c000, 00001000)
> [    0.000000] DEBUG map low page(2c6d000, 00001000)
> [    0.000000] DEBUG map low page(2c6e000, 00001000)
> [    0.000000] DEBUG map low page(2c6f000, 00001000)
> [    0.000000] DEBUG map low page(2c70000, 00001000)
> [    0.000000] DEBUG map low page(2c71000, 00001000)
> [    0.000000] DEBUG map low page(2c72000, 00001000)
> [    0.000000] DEBUG map low page(2c73000, 00001000)
> [    0.000000] DEBUG map low page(2c74000, 00001000)
> [    0.000000] DEBUG map low page(2c75000, 00001000)
> [    0.000000] DEBUG map low page(2c76000, 00001000)
> [    0.000000] DEBUG map low page(2c77000, 00001000)
> [    0.000000] DEBUG map low page(2c78000, 00001000)
> [    0.000000] DEBUG map low page(2c79000, 00001000)
> [    0.000000] DEBUG map low page(2c7a000, 00001000)
> [    0.000000] DEBUG map low page(2c7b000, 00001000)
> [    0.000000] DEBUG map low page(239a000, 00001000)
> [    0.000000] DEBUG map low page(239b000, 00001000)
> [    0.000000] DEBUG map low page(239c000, 00001000)
> [    0.000000] DEBUG map low page(239d000, 00001000)

these already mapped.

> [    0.000000] DEBUG alloc low page(cf93d000, 00001000)
> [    0.000000] DEBUG alloc low page(cf93e000, 00001000)
> [    0.000000] DEBUG alloc low page(cf93f000, 00001000)
> [    0.000000] DEBUG alloc low page(cf940000, 00001000)
> [    0.000000] DEBUG alloc low page(cf941000, 00001000)
> [    0.000000] DEBUG alloc low page(cf942000, 00001000)
> [    0.000000] DEBUG alloc low page(cf943000, 00001000)
> [    0.000000] DEBUG alloc low page(cf944000, 00001000)
> [    0.000000] DEBUG alloc low page(cf945000, 00001000)
> [    0.000000] DEBUG alloc low page(cf946000, 00001000)
> [    0.000000] DEBUG alloc low page(cf947000, 00001000)
> [    0.000000] DEBUG alloc low page(cf948000, 00001000)
> [    0.000000] DEBUG alloc low page(cf949000, 00001000)
> [    0.000000] DEBUG alloc low page(cf94a000, 00001000)
> [    0.000000] DEBUG alloc low page(cf94b000, 00001000)
> [    0.000000] DEBUG alloc low page(cf94c000, 00001000)
> [    0.000000] DEBUG alloc low page(cf94d000, 00001000)
> [    0.000000] DEBUG alloc low page(cf94e000, 00001000)
> [    0.000000] DEBUG alloc low page(cf94f000, 00001000)
> [    0.000000] DEBUG alloc low page(cf950000, 00001000)
> [    0.000000] DEBUG alloc low page(cf951000, 00001000)
> [    0.000000] DEBUG alloc low page(cf952000, 00001000)
> [    0.000000] DEBUG alloc low page(cf953000, 00001000)
> [    0.000000] DEBUG alloc low page(cf954000, 00001000)
> [    0.000000] DEBUG alloc low page(cf955000, 00001000)
> [    0.000000] DEBUG alloc low page(cf956000, 00001000)
> [    0.000000] DEBUG alloc low page(cf957000, 00001000)
> [    0.000000] DEBUG alloc low page(cf958000, 00001000)
> [    0.000000] DEBUG alloc low page(cf959000, 00001000)
> [    0.000000] DEBUG alloc low page(cf95a000, 00001000)
> [    0.000000] DEBUG alloc low page(cf95b000, 00001000)
> [    0.000000] DEBUG alloc low page(cf95c000, 00001000)
> [    0.000000] DEBUG alloc low page(cf95d000, 00001000)
> [    0.000000] DEBUG alloc low page(cf95e000, 00001000)
> [    0.000000] DEBUG alloc low page(cf95f000, 00001000)
> [    0.000000] DEBUG alloc low page(cf960000, 00001000)
> [    0.000000] DEBUG alloc low page(cf961000, 00001000)
> [    0.000000] DEBUG alloc low page(cf962000, 00001000)
> [    0.000000] DEBUG alloc low page(cf963000, 00001000)
> [    0.000000] DEBUG alloc low page(cf964000, 00001000)
> [    0.000000] DEBUG alloc low page(cf965000, 00001000)

these are not mapped yet, and need to use early_ioremap.

>
>
> if this is the case the introduction of map_low_page is unnecessary and
> actually makes the kernel map pages that are already mapped anyway.
> Am I correct?

initial mapping only to 512M, and new page for page tables could above that.

Yinghai

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: "x86-64, mm: Put early page table high" causes crash on Xen
  2011-03-07 19:17     ` Yinghai Lu
@ 2011-03-08 12:14       ` Stefano Stabellini
  2011-03-08 16:39         ` Yinghai Lu
  0 siblings, 1 reply; 13+ messages in thread
From: Stefano Stabellini @ 2011-03-08 12:14 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Stefano Stabellini, linux-kernel, Konrad Rzeszutek Wilk,
	Jeremy Fitzhardinge

[-- Attachment #1: Type: text/plain, Size: 9718 bytes --]

On Mon, 7 Mar 2011, Yinghai Lu wrote:
> On Mon, Mar 7, 2011 at 7:47 AM, Stefano Stabellini
> <stefano.stabellini@eu.citrix.com> wrote:
> > On Tue, 1 Mar 2011, Yinghai Lu wrote:
> >> On 03/01/2011 09:21 AM, Stefano Stabellini wrote:
> >> > Yinghai Lu,
> >> > while testing tip/x86/mm on Xen I found out that the commit "x86-64, mm:
> >> > Put early page table high" reliably crashes Linux at boot.
> >> > The reason is well explained by the commit message of
> >> > fef5ba797991f9335bcfc295942b684f9bf613a1:
> >> >
> >> > "Xen requires that all pages containing pagetable entries to be mapped
> >> > read-only.  If pages used for the initial pagetable are already mapped
> >> > then we can change the mapping to RO.  However, if they are initially
> >> > unmapped, we need to make sure that when they are later mapped, they
> >> > are also mapped RO.
> >> >
> >> > We do this by knowing that the kernel pagetable memory is pre-allocated
> >> > in the range e820_table_start - e820_table_end, so any pfn within this
> >> > range should be mapped read-only.  However, the pagetable setup code
> >> > (Those mappings are removed before the pages are presented to Xen
> >> > as pagetable pages.)"
> >> >
> >> > In other words mask_rw_pte (called by xen_set_pte_init) should mark RO
> >> > the already existing pagetable pages (like the ones belonging to the
> >> > initial mappings), while it should mark RW the new pages not yet hooked
> >> > into the pagetable.  This is what the following lines used to achieve,
> >> > but don't anymore:
> >> >
> >> >     /*
> >> >      * If the new pfn is within the range of the newly allocated
> >> >      * kernel pagetable, and it isn't being mapped into an
> >> >      */
> >> >         pfn >= pgt_buf_start && pfn < pgt_buf_end)
> >> >             pte = pte_wrprotect(pte);
> >> >
> >> > Unfortunately now we map the already existing initial pagetable pages a
> >> > second time and the new zeroed pages using map_low_page, so we are
> >> > unable to distinguish between the two.
> >> >
> >> > Can we go back to the previous way of accessing pagetable pages from
> >> > kernel_physical_mapping_init, while keeping the new pagetable allocation
> >> > strategy? It seems to me that the introduction of map_low_page is not
> >> > actually required, is it? In that case we could just revert that bit...
> >> > (appended partial revert example).
> >>
> >> We do need map_low_page ( BTW, that name is totally misleading...)
> >>
> >> the reason is we put page_table high and at that time is not under max_pfn_mapped. (aka not mapped).
> >>
> >> So have to use
> >>       adr = early_memremap(phys & PAGE_MASK, PAGE_SIZE);
> >> to early map it and Read/Write to it.
> >
> > As you might already know from my other email few days ago, I have found
> > a solution to this problem modifying only Xen specific code.
> > However I went back to this because I want to be sure that the patch is
> > the correct solution rather than a workaround.
> >
> > It seems to me that only pagetable pages freshly allocated with
> > alloc_low_page needs to be mapped because initial pagetable pages are
> > under max_pfn_mapped.
> > Considering that kernel_physical_mapping_init doesn't go through the
> > same pagetable page twice, it is never the case that we map_low_page
> > anything but initial pagetable pages that are under max_pfn_mapped
> > anyway.
> > For example this is what happens on my machine:
> >
> >
> > [    0.000000] initial memory mapped : 0 - 024be000
> > [    0.000000] init_memory_mapping: 0000000000000000-00000000cffc2000
> > [    0.000000]  0000000000 - 00cffc2000 page 4k
> > [    0.000000] kernel direct mapping tables up to cffc2000 @ cf93d000-cffc2000
> > [    0.000000] DEBUG map low page(2004000, 00001000)
> > [    0.000000] DEBUG map low page(2008000, 00001000)
> > [    0.000000] DEBUG map low page(2c62000, 00001000)
> > [    0.000000] DEBUG map low page(2c65000, 00001000)
> > [    0.000000] DEBUG map low page(2c66000, 00001000)
> > [    0.000000] DEBUG map low page(2c67000, 00001000)
> > [    0.000000] DEBUG map low page(2c68000, 00001000)
> > [    0.000000] DEBUG map low page(2c69000, 00001000)
> > [    0.000000] DEBUG map low page(2c6a000, 00001000)
> > [    0.000000] DEBUG map low page(2c6b000, 00001000)
> > [    0.000000] DEBUG map low page(2c6c000, 00001000)
> > [    0.000000] DEBUG map low page(2c6d000, 00001000)
> > [    0.000000] DEBUG map low page(2c6e000, 00001000)
> > [    0.000000] DEBUG map low page(2c6f000, 00001000)
> > [    0.000000] DEBUG map low page(2c70000, 00001000)
> > [    0.000000] DEBUG map low page(2c71000, 00001000)
> > [    0.000000] DEBUG map low page(2c72000, 00001000)
> > [    0.000000] DEBUG map low page(2c73000, 00001000)
> > [    0.000000] DEBUG map low page(2c74000, 00001000)
> > [    0.000000] DEBUG map low page(2c75000, 00001000)
> > [    0.000000] DEBUG map low page(2c76000, 00001000)
> > [    0.000000] DEBUG map low page(2c77000, 00001000)
> > [    0.000000] DEBUG map low page(2c78000, 00001000)
> > [    0.000000] DEBUG map low page(2c79000, 00001000)
> > [    0.000000] DEBUG map low page(2c7a000, 00001000)
> > [    0.000000] DEBUG map low page(2c7b000, 00001000)
> > [    0.000000] DEBUG map low page(239a000, 00001000)
> > [    0.000000] DEBUG map low page(239b000, 00001000)
> > [    0.000000] DEBUG map low page(239c000, 00001000)
> > [    0.000000] DEBUG map low page(239d000, 00001000)
> 
> these already mapped.

Right. So why are we mapping them again using map_low_page?
Maybe we can just access them directly like we did before?
In my tests going back to using phys_pud_update does not cause any
regressions.


> > [    0.000000] DEBUG alloc low page(cf93d000, 00001000)
> > [    0.000000] DEBUG alloc low page(cf93e000, 00001000)
> > [    0.000000] DEBUG alloc low page(cf93f000, 00001000)
> > [    0.000000] DEBUG alloc low page(cf940000, 00001000)
> > [    0.000000] DEBUG alloc low page(cf941000, 00001000)
> > [    0.000000] DEBUG alloc low page(cf942000, 00001000)
> > [    0.000000] DEBUG alloc low page(cf943000, 00001000)
> > [    0.000000] DEBUG alloc low page(cf944000, 00001000)
> > [    0.000000] DEBUG alloc low page(cf945000, 00001000)
> > [    0.000000] DEBUG alloc low page(cf946000, 00001000)
> > [    0.000000] DEBUG alloc low page(cf947000, 00001000)
> > [    0.000000] DEBUG alloc low page(cf948000, 00001000)
> > [    0.000000] DEBUG alloc low page(cf949000, 00001000)
> > [    0.000000] DEBUG alloc low page(cf94a000, 00001000)
> > [    0.000000] DEBUG alloc low page(cf94b000, 00001000)
> > [    0.000000] DEBUG alloc low page(cf94c000, 00001000)
> > [    0.000000] DEBUG alloc low page(cf94d000, 00001000)
> > [    0.000000] DEBUG alloc low page(cf94e000, 00001000)
> > [    0.000000] DEBUG alloc low page(cf94f000, 00001000)
> > [    0.000000] DEBUG alloc low page(cf950000, 00001000)
> > [    0.000000] DEBUG alloc low page(cf951000, 00001000)
> > [    0.000000] DEBUG alloc low page(cf952000, 00001000)
> > [    0.000000] DEBUG alloc low page(cf953000, 00001000)
> > [    0.000000] DEBUG alloc low page(cf954000, 00001000)
> > [    0.000000] DEBUG alloc low page(cf955000, 00001000)
> > [    0.000000] DEBUG alloc low page(cf956000, 00001000)
> > [    0.000000] DEBUG alloc low page(cf957000, 00001000)
> > [    0.000000] DEBUG alloc low page(cf958000, 00001000)
> > [    0.000000] DEBUG alloc low page(cf959000, 00001000)
> > [    0.000000] DEBUG alloc low page(cf95a000, 00001000)
> > [    0.000000] DEBUG alloc low page(cf95b000, 00001000)
> > [    0.000000] DEBUG alloc low page(cf95c000, 00001000)
> > [    0.000000] DEBUG alloc low page(cf95d000, 00001000)
> > [    0.000000] DEBUG alloc low page(cf95e000, 00001000)
> > [    0.000000] DEBUG alloc low page(cf95f000, 00001000)
> > [    0.000000] DEBUG alloc low page(cf960000, 00001000)
> > [    0.000000] DEBUG alloc low page(cf961000, 00001000)
> > [    0.000000] DEBUG alloc low page(cf962000, 00001000)
> > [    0.000000] DEBUG alloc low page(cf963000, 00001000)
> > [    0.000000] DEBUG alloc low page(cf964000, 00001000)
> > [    0.000000] DEBUG alloc low page(cf965000, 00001000)
> 
> these are not mapped yet, and need to use early_ioremap.

In fact we are mapping them inside alloc_low_page. No need for
map_low_page, right?


> > if this is the case the introduction of map_low_page is unnecessary and
> > actually makes the kernel map pages that are already mapped anyway.
> > Am I correct?
> 
> initial mapping only to 512M, and new page for page tables could above that.

Sure, but the new pages are mapped from within alloc_low_page, so it
looks like the call to map_low_page is not useful and we could just go
back to access already existing pagetable pages directly.
For example:


@@ -554,6 +554,17 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
 	return last_map_addr;
 }
 
+static unsigned long __meminit
+phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end,
+		 unsigned long page_size_mask)
+{
+	pud_t *pud;
+
+	pud = (pud_t *)pgd_page_vaddr(*pgd);
+
+	return phys_pud_init(pud, addr, end, page_size_mask);
+}
+
 unsigned long __meminit
 kernel_physical_mapping_init(unsigned long start,
 			     unsigned long end,
@@ -577,10 +588,8 @@ kernel_physical_mapping_init(unsigned long start,
 			next = end;
 
 		if (pgd_val(*pgd)) {
-			pud = map_low_page((pud_t *)pgd_page_vaddr(*pgd));
-			last_map_addr = phys_pud_init(pud, __pa(start),
+			last_map_addr = phys_pud_update(pgd, __pa(start),
 						 __pa(end), page_size_mask);
-			unmap_low_page(pud);
 			continue;
 		}

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: "x86-64, mm: Put early page table high" causes crash on Xen
  2011-03-08 12:14       ` Stefano Stabellini
@ 2011-03-08 16:39         ` Yinghai Lu
  2011-03-08 17:40           ` Stefano Stabellini
  0 siblings, 1 reply; 13+ messages in thread
From: Yinghai Lu @ 2011-03-08 16:39 UTC (permalink / raw)
  To: Stefano Stabellini
  Cc: linux-kernel, Konrad Rzeszutek Wilk, Jeremy Fitzhardinge

On Tue, Mar 8, 2011 at 4:14 AM, Stefano Stabellini
<stefano.stabellini@eu.citrix.com> wrote:
>> > [    0.000000] DEBUG alloc low page(cf965000, 00001000)
>>
>> these are not mapped yet, and need to use early_ioremap.
>
> In fact we are mapping them inside alloc_low_page. No need for
> map_low_page, right?

alloc_low_page return virtual address, but it can not be used to
access that position right away.
so in that case map_low_page is used to convert that virtual address
to phy address and then to early map address.

Yinghai

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: "x86-64, mm: Put early page table high" causes crash on Xen
  2011-03-08 16:39         ` Yinghai Lu
@ 2011-03-08 17:40           ` Stefano Stabellini
  2011-03-08 18:54             ` Yinghai Lu
  0 siblings, 1 reply; 13+ messages in thread
From: Stefano Stabellini @ 2011-03-08 17:40 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Stefano Stabellini, linux-kernel, Konrad Rzeszutek Wilk,
	Jeremy Fitzhardinge

[-- Attachment #1: Type: text/plain, Size: 1546 bytes --]

On Tue, 8 Mar 2011, Yinghai Lu wrote:
> On Tue, Mar 8, 2011 at 4:14 AM, Stefano Stabellini
> <stefano.stabellini@eu.citrix.com> wrote:
> >> > [    0.000000] DEBUG alloc low page(cf965000, 00001000)
> >>
> >> these are not mapped yet, and need to use early_ioremap.
> >
> > In fact we are mapping them inside alloc_low_page. No need for
> > map_low_page, right?
> 
> alloc_low_page return virtual address, but it can not be used to
> access that position right away.
> so in that case map_low_page is used to convert that virtual address
> to phy address and then to early map address.
 
Are you sure?
I am looking at this piece of code under kernel_physical_mapping_init:


---
        pud = alloc_low_page(&pud_phys);
		last_map_addr = phys_pud_init(pud, __pa(start), __pa(next),
						 page_size_mask);
		unmap_low_page(pud);
---

and the implementaion of alloc_low_page is:

---
static __ref void *alloc_low_page(unsigned long *phys)
{
	unsigned long pfn = pgt_buf_end++;
	void *adr;

	if (after_bootmem) {
		adr = (void *)get_zeroed_page(GFP_ATOMIC | __GFP_NOTRACK);
		*phys = __pa(adr);

		return adr;
	}

	if (pfn >= pgt_buf_top)
		panic("alloc_low_page: ran out of memory");

	adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE);
	clear_page(adr);
	*phys  = pfn * PAGE_SIZE;
	return adr;
}
---

It looks to me that map_low_page is never called in this code path.
If I read the code correctly map_low_page is only used to map pagetable
pages that are already mapped (see the debug logging I posted in the
previous email) so it is not useful.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: "x86-64, mm: Put early page table high" causes crash on Xen
  2011-03-08 17:40           ` Stefano Stabellini
@ 2011-03-08 18:54             ` Yinghai Lu
  2011-03-08 20:09               ` Stefano Stabellini
  0 siblings, 1 reply; 13+ messages in thread
From: Yinghai Lu @ 2011-03-08 18:54 UTC (permalink / raw)
  To: Stefano Stabellini
  Cc: linux-kernel, Konrad Rzeszutek Wilk, Jeremy Fitzhardinge

On Tue, Mar 8, 2011 at 9:40 AM, Stefano Stabellini
<stefano.stabellini@eu.citrix.com> wrote:
> On Tue, 8 Mar 2011, Yinghai Lu wrote:
>> On Tue, Mar 8, 2011 at 4:14 AM, Stefano Stabellini
>> <stefano.stabellini@eu.citrix.com> wrote:
>> >> > [    0.000000] DEBUG alloc low page(cf965000, 00001000)
>> >>
>> >> these are not mapped yet, and need to use early_ioremap.
>> >
>> > In fact we are mapping them inside alloc_low_page. No need for
>> > map_low_page, right?
>>
>> alloc_low_page return virtual address, but it can not be used to
>> access that position right away.
>> so in that case map_low_page is used to convert that virtual address
>> to phy address and then to early map address.
>
> Are you sure?
> I am looking at this piece of code under kernel_physical_mapping_init:
>
>
> ---
>        pud = alloc_low_page(&pud_phys);
>                last_map_addr = phys_pud_init(pud, __pa(start), __pa(next),
>                                                 page_size_mask);
>                unmap_low_page(pud);
> ---
>
> and the implementaion of alloc_low_page is:
>
> ---
> static __ref void *alloc_low_page(unsigned long *phys)
> {
>        unsigned long pfn = pgt_buf_end++;
>        void *adr;
>
>        if (after_bootmem) {
>                adr = (void *)get_zeroed_page(GFP_ATOMIC | __GFP_NOTRACK);
>                *phys = __pa(adr);
>
>                return adr;
>        }
>
>        if (pfn >= pgt_buf_top)
>                panic("alloc_low_page: ran out of memory");
>
>        adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE);
>        clear_page(adr);
>        *phys  = pfn * PAGE_SIZE;
>        return adr;
> }
> ---
>
> It looks to me that map_low_page is never called in this code path.
> If I read the code correctly map_low_page is only used to map pagetable
> pages that are already mapped (see the debug logging I posted in the
> previous email) so it is not useful.


[    0.000000] kernel direct mapping tables up to 7f750000 @
[0x7f74c000-0x7f74ffff] pre-allocated
[    0.000000] DEBUG map_low_page phys    243a000
[    0.000000] DEBUG map_low_page phys    243e000
[    0.000000] DEBUG alloc_low_page phys   7f74c000
[    0.000000] DEBUG map_low_page phys    243a000
[    0.000000] DEBUG map_low_page phys   7f74c000
[    0.000000] DEBUG alloc_low_page phys   7f74d000

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: "x86-64, mm: Put early page table high" causes crash on Xen
  2011-03-08 18:54             ` Yinghai Lu
@ 2011-03-08 20:09               ` Stefano Stabellini
  2011-03-08 20:21                 ` Yinghai Lu
  0 siblings, 1 reply; 13+ messages in thread
From: Stefano Stabellini @ 2011-03-08 20:09 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Stefano Stabellini, linux-kernel, Konrad Rzeszutek Wilk,
	Jeremy Fitzhardinge

On Tue, 8 Mar 2011, Yinghai Lu wrote:
> [    0.000000] kernel direct mapping tables up to 7f750000 @
> [0x7f74c000-0x7f74ffff] pre-allocated
> [    0.000000] DEBUG map_low_page phys    243a000
> [    0.000000] DEBUG map_low_page phys    243e000
> [    0.000000] DEBUG alloc_low_page phys   7f74c000
> [    0.000000] DEBUG map_low_page phys    243a000
> [    0.000000] DEBUG map_low_page phys   7f74c000
> [    0.000000] DEBUG alloc_low_page phys   7f74d000
> 

I couldn't see this on my machine but I was afraid it could happen on
some hardware :(
Thank you very much for spending your time on this and for your kind
replies.
I'll follow up with an update of the xen specific patch I posted before
and I won't bother you again with this :)
Cheers,

Stefano

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: "x86-64, mm: Put early page table high" causes crash on Xen
  2011-03-08 20:09               ` Stefano Stabellini
@ 2011-03-08 20:21                 ` Yinghai Lu
  0 siblings, 0 replies; 13+ messages in thread
From: Yinghai Lu @ 2011-03-08 20:21 UTC (permalink / raw)
  To: Stefano Stabellini
  Cc: linux-kernel, Konrad Rzeszutek Wilk, Jeremy Fitzhardinge

On Tue, Mar 8, 2011 at 12:09 PM, Stefano Stabellini
<stefano.stabellini@eu.citrix.com> wrote:
> On Tue, 8 Mar 2011, Yinghai Lu wrote:
>> [    0.000000] kernel direct mapping tables up to 7f750000 @
>> [0x7f74c000-0x7f74ffff] pre-allocated
>> [    0.000000] DEBUG map_low_page phys    243a000
>> [    0.000000] DEBUG map_low_page phys    243e000
>> [    0.000000] DEBUG alloc_low_page phys   7f74c000
>> [    0.000000] DEBUG map_low_page phys    243a000
>> [    0.000000] DEBUG map_low_page phys   7f74c000
>> [    0.000000] DEBUG alloc_low_page phys   7f74d000
>>
>
> I couldn't see this on my machine but I was afraid it could happen on
> some hardware :(

if system have more than 4g memory.

> Thank you very much for spending your time on this and for your kind
> replies.
> I'll follow up with an update of the xen specific patch I posted before
> and I won't bother you again with this :)

never mind.

Thanks

Yinghai

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2011-03-08 20:21 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-03-01 17:21 "x86-64, mm: Put early page table high" causes crash on Xen Stefano Stabellini
2011-03-01 21:47 ` Yinghai Lu
2011-03-02 15:23   ` Stefano Stabellini
2011-03-02 17:06     ` Konrad Rzeszutek Wilk
2011-03-07 15:55       ` Stefano Stabellini
2011-03-07 15:47   ` Stefano Stabellini
2011-03-07 19:17     ` Yinghai Lu
2011-03-08 12:14       ` Stefano Stabellini
2011-03-08 16:39         ` Yinghai Lu
2011-03-08 17:40           ` Stefano Stabellini
2011-03-08 18:54             ` Yinghai Lu
2011-03-08 20:09               ` Stefano Stabellini
2011-03-08 20:21                 ` Yinghai Lu

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.