linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] [RESEND] arm: limit memblock base address for early_pte_alloc
@ 2012-06-05  7:11 Minchan Kim
  2012-06-08 13:58 ` Kim, Jong-Sung
                   ` (2 more replies)
  0 siblings, 3 replies; 14+ messages in thread
From: Minchan Kim @ 2012-06-05  7:11 UTC (permalink / raw)
  To: Russell King
  Cc: Nicolas Pitre, Catalin Marinas, linux-arm-kernel, linux-kernel,
	Jongsung Kim, Chanho Min, linux-mm, Minchan Kim

If we do arm_memblock_steal with a page which is not aligned with section size,
panic can happen during boot by page fault in map_lowmem.

Detail:

1) mdesc->reserve can steal a page which is allocated at 0x1ffff000 by memblock
   which prefers tail pages of regions.
2) map_lowmem maps 0x00000000 - 0x1fe00000
3) map_lowmem try to map 0x1fe00000 but it's not aligned by section due to 1.
4) calling alloc_init_pte allocates a new page for new pte by memblock_alloc
5) allocated memory for pte is 0x1fffe000 -> it's not mapped yet.
6) memset(ptr, 0, sz) in early_alloc_aligned got PANICed!

This patch fix it by limiting memblock to mapped memory range.

Reported-by: Jongsung Kim <neidhard.kim@lge.com>
Suggested-by: Chanho Min <chanho.min@lge.com>
Signed-off-by: Minchan Kim <minchan@kernel.org>
---
 arch/arm/mm/mmu.c |   37 ++++++++++++++++++++++---------------
 1 file changed, 22 insertions(+), 15 deletions(-)

diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index e5dad60..a15aafe 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -594,7 +594,7 @@ static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
 
 static void __init alloc_init_section(pud_t *pud, unsigned long addr,
 				      unsigned long end, phys_addr_t phys,
-				      const struct mem_type *type)
+				      const struct mem_type *type, bool lowmem)
 {
 	pmd_t *pmd = pmd_offset(pud, addr);
 
@@ -619,6 +619,8 @@ static void __init alloc_init_section(pud_t *pud, unsigned long addr,
 
 		flush_pmd_entry(p);
 	} else {
+		if (lowmem)
+			memblock_set_current_limit(__pa(addr));
 		/*
 		 * No need to loop; pte's aren't interested in the
 		 * individual L1 entries.
@@ -628,14 +630,15 @@ static void __init alloc_init_section(pud_t *pud, unsigned long addr,
 }
 
 static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr,
-	unsigned long end, unsigned long phys, const struct mem_type *type)
+				unsigned long end, unsigned long phys,
+				const struct mem_type *type, bool lowmem)
 {
 	pud_t *pud = pud_offset(pgd, addr);
 	unsigned long next;
 
 	do {
 		next = pud_addr_end(addr, end);
-		alloc_init_section(pud, addr, next, phys, type);
+		alloc_init_section(pud, addr, next, phys, type, lowmem);
 		phys += next - addr;
 	} while (pud++, addr = next, addr != end);
 }
@@ -702,14 +705,7 @@ static void __init create_36bit_mapping(struct map_desc *md,
 }
 #endif	/* !CONFIG_ARM_LPAE */
 
-/*
- * Create the page directory entries and any necessary
- * page tables for the mapping specified by `md'.  We
- * are able to cope here with varying sizes and address
- * offsets, and we take full advantage of sections and
- * supersections.
- */
-static void __init create_mapping(struct map_desc *md)
+static inline void __create_mapping(struct map_desc *md, bool lowmem)
 {
 	unsigned long addr, length, end;
 	phys_addr_t phys;
@@ -759,7 +755,7 @@ static void __init create_mapping(struct map_desc *md)
 	do {
 		unsigned long next = pgd_addr_end(addr, end);
 
-		alloc_init_pud(pgd, addr, next, phys, type);
+		alloc_init_pud(pgd, addr, next, phys, type, lowmem);
 
 		phys += next - addr;
 		addr = next;
@@ -767,6 +763,18 @@ static void __init create_mapping(struct map_desc *md)
 }
 
 /*
+ * Create the page directory entries and any necessary
+ * page tables for the mapping specified by `md'.  We
+ * are able to cope here with varying sizes and address
+ * offsets, and we take full advantage of sections and
+ * supersections.
+ */
+static void __init create_mapping(struct map_desc *md)
+{
+	__create_mapping(md, false);
+}
+
+/*
  * Create the architecture specific mappings
  */
 void __init iotable_init(struct map_desc *io_desc, int nr)
@@ -1111,7 +1119,7 @@ static void __init map_lowmem(void)
 		map.length = end - start;
 		map.type = MT_MEMORY;
 
-		create_mapping(&map);
+		__create_mapping(&map, true);
 	}
 }
 
@@ -1123,11 +1131,10 @@ void __init paging_init(struct machine_desc *mdesc)
 {
 	void *zero_page;
 
-	memblock_set_current_limit(arm_lowmem_limit);
-
 	build_mem_type_table();
 	prepare_page_table();
 	map_lowmem();
+	memblock_set_current_limit(arm_lowmem_limit);
 	dma_contiguous_remap();
 	devicemaps_init(mdesc);
 	kmap_init();
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* RE: [PATCH] [RESEND] arm: limit memblock base address for early_pte_alloc
  2012-06-05  7:11 [PATCH] [RESEND] arm: limit memblock base address for early_pte_alloc Minchan Kim
@ 2012-06-08 13:58 ` Kim, Jong-Sung
  2012-06-27 16:02   ` Dave Martin
  2012-06-27 19:18   ` Russell King - ARM Linux
  2012-06-19  8:38 ` Minchan Kim
  2012-06-27 16:12 ` Dave Martin
  2 siblings, 2 replies; 14+ messages in thread
From: Kim, Jong-Sung @ 2012-06-08 13:58 UTC (permalink / raw)
  To: 'Minchan Kim', 'Russell King'
  Cc: 'Nicolas Pitre', 'Catalin Marinas',
	linux-arm-kernel, linux-kernel, 'Chanho Min',
	linux-mm

> From: Minchan Kim [mailto:minchan@kernel.org]
> Sent: Tuesday, June 05, 2012 4:12 PM
> 
> If we do arm_memblock_steal with a page which is not aligned with section
> size, panic can happen during boot by page fault in map_lowmem.
> 
> Detail:
> 
> 1) mdesc->reserve can steal a page which is allocated at 0x1ffff000 by
> memblock
>    which prefers tail pages of regions.
> 2) map_lowmem maps 0x00000000 - 0x1fe00000
> 3) map_lowmem try to map 0x1fe00000 but it's not aligned by section due to
1.
> 4) calling alloc_init_pte allocates a new page for new pte by
memblock_alloc
> 5) allocated memory for pte is 0x1fffe000 -> it's not mapped yet.
> 6) memset(ptr, 0, sz) in early_alloc_aligned got PANICed!

May I suggest another simple approach? The first continuous couples of
sections are always safely section-mapped inside alloc_init_section funtion.
So, by limiting memblock_alloc to the end of the first continuous couples of
sections at the start of map_lowmem, map_lowmem can safely memblock_alloc &
memset even if we have one or more section-unaligned memory regions. The
limit can be extended back to arm_lowmem_limit after the map_lowmem is done.

diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index e5dad60..edf1e2d 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -1094,6 +1094,11 @@ static void __init kmap_init(void)
 static void __init map_lowmem(void)
 {
 	struct memblock_region *reg;
+	phys_addr_t pmd_map_end;
+
+	pmd_map_end = (memblock.memory.regions[0].base +
+	               memblock.memory.regions[0].size) & PMD_MASK;
+	memblock_set_current_limit(pmd_map_end);
 
 	/* Map all the lowmem memory banks. */
 	for_each_memblock(memory, reg) {
@@ -1113,6 +1118,8 @@ static void __init map_lowmem(void)
 
 		create_mapping(&map);
 	}
+
+	memblock_set_current_limit(arm_lowmem_limit);
 }
 
 /*
@@ -1123,8 +1130,6 @@ void __init paging_init(struct machine_desc *mdesc)
 {
 	void *zero_page;
 
-	memblock_set_current_limit(arm_lowmem_limit);
-
 	build_mem_type_table();
 	prepare_page_table();
 	map_lowmem();



^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH] [RESEND] arm: limit memblock base address for early_pte_alloc
  2012-06-05  7:11 [PATCH] [RESEND] arm: limit memblock base address for early_pte_alloc Minchan Kim
  2012-06-08 13:58 ` Kim, Jong-Sung
@ 2012-06-19  8:38 ` Minchan Kim
  2012-06-27 16:12 ` Dave Martin
  2 siblings, 0 replies; 14+ messages in thread
From: Minchan Kim @ 2012-06-19  8:38 UTC (permalink / raw)
  To: Russell King
  Cc: Nicolas Pitre, Catalin Marinas, linux-arm-kernel, linux-kernel,
	Jongsung Kim, Chanho Min, linux-mm, Minchan Kim

Resend.

Could you please see this problem?

Thanks.

On Tue, Jun 5, 2012 at 4:11 PM, Minchan Kim <minchan@kernel.org> wrote:
> If we do arm_memblock_steal with a page which is not aligned with section size,
> panic can happen during boot by page fault in map_lowmem.
>
> Detail:
>
> 1) mdesc->reserve can steal a page which is allocated at 0x1ffff000 by memblock
>   which prefers tail pages of regions.
> 2) map_lowmem maps 0x00000000 - 0x1fe00000
> 3) map_lowmem try to map 0x1fe00000 but it's not aligned by section due to 1.
> 4) calling alloc_init_pte allocates a new page for new pte by memblock_alloc
> 5) allocated memory for pte is 0x1fffe000 -> it's not mapped yet.
> 6) memset(ptr, 0, sz) in early_alloc_aligned got PANICed!
>
> This patch fix it by limiting memblock to mapped memory range.
>
> Reported-by: Jongsung Kim <neidhard.kim@lge.com>
> Suggested-by: Chanho Min <chanho.min@lge.com>
> Signed-off-by: Minchan Kim <minchan@kernel.org>
> ---
>  arch/arm/mm/mmu.c |   37 ++++++++++++++++++++++---------------
>  1 file changed, 22 insertions(+), 15 deletions(-)
>
> diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
> index e5dad60..a15aafe 100644
> --- a/arch/arm/mm/mmu.c
> +++ b/arch/arm/mm/mmu.c
> @@ -594,7 +594,7 @@ static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
>
>  static void __init alloc_init_section(pud_t *pud, unsigned long addr,
>                                      unsigned long end, phys_addr_t phys,
> -                                     const struct mem_type *type)
> +                                     const struct mem_type *type, bool lowmem)
>  {
>        pmd_t *pmd = pmd_offset(pud, addr);
>
> @@ -619,6 +619,8 @@ static void __init alloc_init_section(pud_t *pud, unsigned long addr,
>
>                flush_pmd_entry(p);
>        } else {
> +               if (lowmem)
> +                       memblock_set_current_limit(__pa(addr));
>                /*
>                 * No need to loop; pte's aren't interested in the
>                 * individual L1 entries.
> @@ -628,14 +630,15 @@ static void __init alloc_init_section(pud_t *pud, unsigned long addr,
>  }
>
>  static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr,
> -       unsigned long end, unsigned long phys, const struct mem_type *type)
> +                               unsigned long end, unsigned long phys,
> +                               const struct mem_type *type, bool lowmem)
>  {
>        pud_t *pud = pud_offset(pgd, addr);
>        unsigned long next;
>
>        do {
>                next = pud_addr_end(addr, end);
> -               alloc_init_section(pud, addr, next, phys, type);
> +               alloc_init_section(pud, addr, next, phys, type, lowmem);
>                phys += next - addr;
>        } while (pud++, addr = next, addr != end);
>  }
> @@ -702,14 +705,7 @@ static void __init create_36bit_mapping(struct map_desc *md,
>  }
>  #endif /* !CONFIG_ARM_LPAE */
>
> -/*
> - * Create the page directory entries and any necessary
> - * page tables for the mapping specified by `md'.  We
> - * are able to cope here with varying sizes and address
> - * offsets, and we take full advantage of sections and
> - * supersections.
> - */
> -static void __init create_mapping(struct map_desc *md)
> +static inline void __create_mapping(struct map_desc *md, bool lowmem)
>  {
>        unsigned long addr, length, end;
>        phys_addr_t phys;
> @@ -759,7 +755,7 @@ static void __init create_mapping(struct map_desc *md)
>        do {
>                unsigned long next = pgd_addr_end(addr, end);
>
> -               alloc_init_pud(pgd, addr, next, phys, type);
> +               alloc_init_pud(pgd, addr, next, phys, type, lowmem);
>
>                phys += next - addr;
>                addr = next;
> @@ -767,6 +763,18 @@ static void __init create_mapping(struct map_desc *md)
>  }
>
>  /*
> + * Create the page directory entries and any necessary
> + * page tables for the mapping specified by `md'.  We
> + * are able to cope here with varying sizes and address
> + * offsets, and we take full advantage of sections and
> + * supersections.
> + */
> +static void __init create_mapping(struct map_desc *md)
> +{
> +       __create_mapping(md, false);
> +}
> +
> +/*
>  * Create the architecture specific mappings
>  */
>  void __init iotable_init(struct map_desc *io_desc, int nr)
> @@ -1111,7 +1119,7 @@ static void __init map_lowmem(void)
>                map.length = end - start;
>                map.type = MT_MEMORY;
>
> -               create_mapping(&map);
> +               __create_mapping(&map, true);
>        }
>  }
>
> @@ -1123,11 +1131,10 @@ void __init paging_init(struct machine_desc *mdesc)
>  {
>        void *zero_page;
>
> -       memblock_set_current_limit(arm_lowmem_limit);
> -
>        build_mem_type_table();
>        prepare_page_table();
>        map_lowmem();
> +       memblock_set_current_limit(arm_lowmem_limit);
>        dma_contiguous_remap();
>        devicemaps_init(mdesc);
>        kmap_init();
> --
> 1.7.9.5
>



-- 
Kind regards,
Minchan Kim

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] [RESEND] arm: limit memblock base address for early_pte_alloc
  2012-06-08 13:58 ` Kim, Jong-Sung
@ 2012-06-27 16:02   ` Dave Martin
  2012-06-28  5:43     ` Kim, Jong-Sung
  2012-06-27 19:18   ` Russell King - ARM Linux
  1 sibling, 1 reply; 14+ messages in thread
From: Dave Martin @ 2012-06-27 16:02 UTC (permalink / raw)
  To: Kim, Jong-Sung
  Cc: 'Minchan Kim', 'Russell King',
	'Nicolas Pitre', 'Catalin Marinas',
	'Chanho Min',
	linux-kernel, linux-mm, linux-arm-kernel

On Fri, Jun 08, 2012 at 10:58:50PM +0900, Kim, Jong-Sung wrote:
> > From: Minchan Kim [mailto:minchan@kernel.org]
> > Sent: Tuesday, June 05, 2012 4:12 PM
> > 
> > If we do arm_memblock_steal with a page which is not aligned with section
> > size, panic can happen during boot by page fault in map_lowmem.
> > 
> > Detail:
> > 
> > 1) mdesc->reserve can steal a page which is allocated at 0x1ffff000 by
> > memblock
> >    which prefers tail pages of regions.
> > 2) map_lowmem maps 0x00000000 - 0x1fe00000
> > 3) map_lowmem try to map 0x1fe00000 but it's not aligned by section due to
> 1.
> > 4) calling alloc_init_pte allocates a new page for new pte by
> memblock_alloc
> > 5) allocated memory for pte is 0x1fffe000 -> it's not mapped yet.
> > 6) memset(ptr, 0, sz) in early_alloc_aligned got PANICed!
> 
> May I suggest another simple approach? The first continuous couples of
> sections are always safely section-mapped inside alloc_init_section funtion.
> So, by limiting memblock_alloc to the end of the first continuous couples of
> sections at the start of map_lowmem, map_lowmem can safely memblock_alloc &
> memset even if we have one or more section-unaligned memory regions. The
> limit can be extended back to arm_lowmem_limit after the map_lowmem is done.

By a strange coincidence, I hit exactly the same problem today.

This approach looks nice and simple, but ...

> 
> diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
> index e5dad60..edf1e2d 100644
> --- a/arch/arm/mm/mmu.c
> +++ b/arch/arm/mm/mmu.c
> @@ -1094,6 +1094,11 @@ static void __init kmap_init(void)
>  static void __init map_lowmem(void)
>  {
>  	struct memblock_region *reg;
> +	phys_addr_t pmd_map_end;
> +
> +	pmd_map_end = (memblock.memory.regions[0].base +
> +	               memblock.memory.regions[0].size) & PMD_MASK;

What does memblock.memory.regions[0] actually refer to at this point?
Just before map_lowmem(), memblock_dump_all() gives me this:

[    0.000000] MEMBLOCK configuration:
[    0.000000]  memory size = 0x1ff00000 reserved size = 0x6220a5
[    0.000000]  memory.cnt  = 0x1
[    0.000000]  memory[0x0]     [0x00000080000000-0x0000009fefffff], 0x1ff00000 bytes
[    0.000000]  reserved.cnt  = 0x4
[    0.000000]  reserved[0x0]   [0x00000080004000-0x00000080007fff], 0x4000 bytes
[    0.000000]  reserved[0x1]   [0x00000080008200-0x00000080582c83], 0x57aa84 bytes
[    0.000000]  reserved[0x2]   [0x000000807d4e78-0x000000807d6603], 0x178c bytes
[    0.000000]  reserved[0x3]   [0x00000080d00000-0x00000080da1e94], 0xa1e95 bytes

For me, it appears that this block just contains the initial region
passed in ATAG_MEM or on the command line, with some reservations
for swapper_pg_dir, the kernel text/data, device tree and initramfs.

So far as I can tell, the only memory guaranteed to be mapped here
is the kernel image: there may be no guarantee that there is any unused
space in this region which could be used to allocate extra page tables.
The rest appears during the execution of map_lowmem().

Cheers
---Dave

> +	memblock_set_current_limit(pmd_map_end);
>  
>  	/* Map all the lowmem memory banks. */
>  	for_each_memblock(memory, reg) {
> @@ -1113,6 +1118,8 @@ static void __init map_lowmem(void)
>  
>  		create_mapping(&map);
>  	}
> +
> +	memblock_set_current_limit(arm_lowmem_limit);
>  }
>  
>  /*
> @@ -1123,8 +1130,6 @@ void __init paging_init(struct machine_desc *mdesc)
>  {
>  	void *zero_page;
>  
> -	memblock_set_current_limit(arm_lowmem_limit);
> -
>  	build_mem_type_table();
>  	prepare_page_table();
>  	map_lowmem();
> 
> 
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] [RESEND] arm: limit memblock base address for early_pte_alloc
  2012-06-05  7:11 [PATCH] [RESEND] arm: limit memblock base address for early_pte_alloc Minchan Kim
  2012-06-08 13:58 ` Kim, Jong-Sung
  2012-06-19  8:38 ` Minchan Kim
@ 2012-06-27 16:12 ` Dave Martin
  2012-06-28  4:33   ` Nicolas Pitre
  2 siblings, 1 reply; 14+ messages in thread
From: Dave Martin @ 2012-06-27 16:12 UTC (permalink / raw)
  To: Minchan Kim
  Cc: Russell King, Nicolas Pitre, Catalin Marinas, Chanho Min,
	linux-kernel, linux-mm, Jongsung Kim, linux-arm-kernel

On Tue, Jun 05, 2012 at 04:11:52PM +0900, Minchan Kim wrote:
> If we do arm_memblock_steal with a page which is not aligned with section size,
> panic can happen during boot by page fault in map_lowmem.
> 
> Detail:
> 
> 1) mdesc->reserve can steal a page which is allocated at 0x1ffff000 by memblock
>    which prefers tail pages of regions.
> 2) map_lowmem maps 0x00000000 - 0x1fe00000
> 3) map_lowmem try to map 0x1fe00000 but it's not aligned by section due to 1.
> 4) calling alloc_init_pte allocates a new page for new pte by memblock_alloc
> 5) allocated memory for pte is 0x1fffe000 -> it's not mapped yet.
> 6) memset(ptr, 0, sz) in early_alloc_aligned got PANICed!
> 
> This patch fix it by limiting memblock to mapped memory range.
> 
> Reported-by: Jongsung Kim <neidhard.kim@lge.com>
> Suggested-by: Chanho Min <chanho.min@lge.com>
> Signed-off-by: Minchan Kim <minchan@kernel.org>
> ---
>  arch/arm/mm/mmu.c |   37 ++++++++++++++++++++++---------------
>  1 file changed, 22 insertions(+), 15 deletions(-)
> 
> diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
> index e5dad60..a15aafe 100644
> --- a/arch/arm/mm/mmu.c
> +++ b/arch/arm/mm/mmu.c
> @@ -594,7 +594,7 @@ static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
>  
>  static void __init alloc_init_section(pud_t *pud, unsigned long addr,
>  				      unsigned long end, phys_addr_t phys,
> -				      const struct mem_type *type)
> +				      const struct mem_type *type, bool lowmem)
>  {
>  	pmd_t *pmd = pmd_offset(pud, addr);
>  
> @@ -619,6 +619,8 @@ static void __init alloc_init_section(pud_t *pud, unsigned long addr,
>  
>  		flush_pmd_entry(p);
>  	} else {
> +		if (lowmem)
> +			memblock_set_current_limit(__pa(addr));

I thought of doing something similar to this.  A concern I have is that
when mapping the first few sections, is it guaranteed that there will be
enough memory available below memblock.current_limit to allocate extra
page tables, in general?  I think we could get failures here even though
there is spare (unmapped) memory above the limit.

An alternative approach would be to install temporary section mappings
to cover all of lowmem before starting to create the real mappings.
The memblock allocator still knows which regions are reserved, so we
shouldn't end up scribbling on pages which are really not supposed to
be mapped in lowmem.

That feels like it should work, but would involve extra overheads, more
flushing etc. to purge all those temporary section mappings from the TLB.

Cheers
---Dave

>  		/*
>  		 * No need to loop; pte's aren't interested in the
>  		 * individual L1 entries.
> @@ -628,14 +630,15 @@ static void __init alloc_init_section(pud_t *pud, unsigned long addr,
>  }
>  
>  static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr,
> -	unsigned long end, unsigned long phys, const struct mem_type *type)
> +				unsigned long end, unsigned long phys,
> +				const struct mem_type *type, bool lowmem)
>  {
>  	pud_t *pud = pud_offset(pgd, addr);
>  	unsigned long next;
>  
>  	do {
>  		next = pud_addr_end(addr, end);
> -		alloc_init_section(pud, addr, next, phys, type);
> +		alloc_init_section(pud, addr, next, phys, type, lowmem);
>  		phys += next - addr;
>  	} while (pud++, addr = next, addr != end);
>  }
> @@ -702,14 +705,7 @@ static void __init create_36bit_mapping(struct map_desc *md,
>  }
>  #endif	/* !CONFIG_ARM_LPAE */
>  
> -/*
> - * Create the page directory entries and any necessary
> - * page tables for the mapping specified by `md'.  We
> - * are able to cope here with varying sizes and address
> - * offsets, and we take full advantage of sections and
> - * supersections.
> - */
> -static void __init create_mapping(struct map_desc *md)
> +static inline void __create_mapping(struct map_desc *md, bool lowmem)
>  {
>  	unsigned long addr, length, end;
>  	phys_addr_t phys;
> @@ -759,7 +755,7 @@ static void __init create_mapping(struct map_desc *md)
>  	do {
>  		unsigned long next = pgd_addr_end(addr, end);
>  
> -		alloc_init_pud(pgd, addr, next, phys, type);
> +		alloc_init_pud(pgd, addr, next, phys, type, lowmem);
>  
>  		phys += next - addr;
>  		addr = next;
> @@ -767,6 +763,18 @@ static void __init create_mapping(struct map_desc *md)
>  }
>  
>  /*
> + * Create the page directory entries and any necessary
> + * page tables for the mapping specified by `md'.  We
> + * are able to cope here with varying sizes and address
> + * offsets, and we take full advantage of sections and
> + * supersections.
> + */
> +static void __init create_mapping(struct map_desc *md)
> +{
> +	__create_mapping(md, false);
> +}
> +
> +/*
>   * Create the architecture specific mappings
>   */
>  void __init iotable_init(struct map_desc *io_desc, int nr)
> @@ -1111,7 +1119,7 @@ static void __init map_lowmem(void)
>  		map.length = end - start;
>  		map.type = MT_MEMORY;
>  
> -		create_mapping(&map);
> +		__create_mapping(&map, true);
>  	}
>  }
>  
> @@ -1123,11 +1131,10 @@ void __init paging_init(struct machine_desc *mdesc)
>  {
>  	void *zero_page;
>  
> -	memblock_set_current_limit(arm_lowmem_limit);
> -
>  	build_mem_type_table();
>  	prepare_page_table();
>  	map_lowmem();
> +	memblock_set_current_limit(arm_lowmem_limit);
>  	dma_contiguous_remap();
>  	devicemaps_init(mdesc);
>  	kmap_init();
> -- 
> 1.7.9.5
> 
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] [RESEND] arm: limit memblock base address for early_pte_alloc
  2012-06-08 13:58 ` Kim, Jong-Sung
  2012-06-27 16:02   ` Dave Martin
@ 2012-06-27 19:18   ` Russell King - ARM Linux
  2012-06-28  6:08     ` Kim, Jong-Sung
  1 sibling, 1 reply; 14+ messages in thread
From: Russell King - ARM Linux @ 2012-06-27 19:18 UTC (permalink / raw)
  To: Kim, Jong-Sung
  Cc: 'Minchan Kim', 'Nicolas Pitre',
	'Catalin Marinas',
	linux-arm-kernel, linux-kernel, 'Chanho Min',
	linux-mm

On Fri, Jun 08, 2012 at 10:58:50PM +0900, Kim, Jong-Sung wrote:
> > From: Minchan Kim [mailto:minchan@kernel.org]
> > Sent: Tuesday, June 05, 2012 4:12 PM
> > 
> > If we do arm_memblock_steal with a page which is not aligned with section
> > size, panic can happen during boot by page fault in map_lowmem.
> > 
> > Detail:
> > 
> > 1) mdesc->reserve can steal a page which is allocated at 0x1ffff000 by
> > memblock
> >    which prefers tail pages of regions.
> > 2) map_lowmem maps 0x00000000 - 0x1fe00000
> > 3) map_lowmem try to map 0x1fe00000 but it's not aligned by section due to
> 1.
> > 4) calling alloc_init_pte allocates a new page for new pte by
> memblock_alloc
> > 5) allocated memory for pte is 0x1fffe000 -> it's not mapped yet.
> > 6) memset(ptr, 0, sz) in early_alloc_aligned got PANICed!
> 
> May I suggest another simple approach? The first continuous couples of
> sections are always safely section-mapped inside alloc_init_section funtion.
> So, by limiting memblock_alloc to the end of the first continuous couples of
> sections at the start of map_lowmem, map_lowmem can safely memblock_alloc &
> memset even if we have one or more section-unaligned memory regions. The
> limit can be extended back to arm_lowmem_limit after the map_lowmem is done.

No.  What if the first block of memory is not large enough to handle all
the allocations?

I think the real problem is folk trying to reserve small amounts.  I have
said all reservations must be aligned to 1MB.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] [RESEND] arm: limit memblock base address for early_pte_alloc
  2012-06-27 16:12 ` Dave Martin
@ 2012-06-28  4:33   ` Nicolas Pitre
  2012-06-28  4:46     ` Minchan Kim
  2012-06-28  9:08     ` Russell King - ARM Linux
  0 siblings, 2 replies; 14+ messages in thread
From: Nicolas Pitre @ 2012-06-28  4:33 UTC (permalink / raw)
  To: Dave Martin
  Cc: Minchan Kim, Russell King, Catalin Marinas, Chanho Min,
	linux-kernel, linux-mm, Jongsung Kim, linux-arm-kernel

On Wed, 27 Jun 2012, Dave Martin wrote:

> On Tue, Jun 05, 2012 at 04:11:52PM +0900, Minchan Kim wrote:
> > If we do arm_memblock_steal with a page which is not aligned with section size,
> > panic can happen during boot by page fault in map_lowmem.
> > 
> > Detail:
> > 
> > 1) mdesc->reserve can steal a page which is allocated at 0x1ffff000 by memblock
> >    which prefers tail pages of regions.
> > 2) map_lowmem maps 0x00000000 - 0x1fe00000
> > 3) map_lowmem try to map 0x1fe00000 but it's not aligned by section due to 1.
> > 4) calling alloc_init_pte allocates a new page for new pte by memblock_alloc
> > 5) allocated memory for pte is 0x1fffe000 -> it's not mapped yet.
> > 6) memset(ptr, 0, sz) in early_alloc_aligned got PANICed!
> > 
> > This patch fix it by limiting memblock to mapped memory range.
> > 
> > Reported-by: Jongsung Kim <neidhard.kim@lge.com>
> > Suggested-by: Chanho Min <chanho.min@lge.com>
> > Signed-off-by: Minchan Kim <minchan@kernel.org>
> > ---
> >  arch/arm/mm/mmu.c |   37 ++++++++++++++++++++++---------------
> >  1 file changed, 22 insertions(+), 15 deletions(-)
> > 
> > diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
> > index e5dad60..a15aafe 100644
> > --- a/arch/arm/mm/mmu.c
> > +++ b/arch/arm/mm/mmu.c
> > @@ -594,7 +594,7 @@ static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
> >  
> >  static void __init alloc_init_section(pud_t *pud, unsigned long addr,
> >  				      unsigned long end, phys_addr_t phys,
> > -				      const struct mem_type *type)
> > +				      const struct mem_type *type, bool lowmem)
> >  {
> >  	pmd_t *pmd = pmd_offset(pud, addr);
> >  
> > @@ -619,6 +619,8 @@ static void __init alloc_init_section(pud_t *pud, unsigned long addr,
> >  
> >  		flush_pmd_entry(p);
> >  	} else {
> > +		if (lowmem)
> > +			memblock_set_current_limit(__pa(addr));
> 
> I thought of doing something similar to this.  A concern I have is that
> when mapping the first few sections, is it guaranteed that there will be
> enough memory available below memblock.current_limit to allocate extra
> page tables, in general?  I think we could get failures here even though
> there is spare (unmapped) memory above the limit.
> 
> An alternative approach would be to install temporary section mappings
> to cover all of lowmem before starting to create the real mappings.
> The memblock allocator still knows which regions are reserved, so we
> shouldn't end up scribbling on pages which are really not supposed to
> be mapped in lowmem.
> 
> That feels like it should work, but would involve extra overheads, more
> flushing etc. to purge all those temporary section mappings from the TLB.

This is all rather fragile and inelegant.

I propose the following two patches instead -- both patches are included 
inline not to break the email thread.  What do you think?

---------- >8

From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Wed, 27 Jun 2012 23:02:31 -0400
Subject: [PATCH] ARM: head.S: simplify initial page table mapping

Let's map the initial RAM up to the end of the kernel.bss plus 64MB
instead of the strict kernel image area.  This simplifies the code
as the kernel image only needs to be handled specially in the XIP case.
This also give some room for the early memory allocator to use before
the real mapping is finally installed with the actual amount of memory.

Signed-off-by: Nicolas Pitre <nico@linaro.org>

diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 835898e7d7..cc3103fb66 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -55,14 +55,6 @@
 	add	\rd, \phys, #TEXT_OFFSET - PG_DIR_SIZE
 	.endm
 
-#ifdef CONFIG_XIP_KERNEL
-#define KERNEL_START	XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR)
-#define KERNEL_END	_edata_loc
-#else
-#define KERNEL_START	KERNEL_RAM_VADDR
-#define KERNEL_END	_end
-#endif
-
 /*
  * Kernel startup entry point.
  * ---------------------------
@@ -218,51 +210,50 @@ __create_page_tables:
 	blo	1b
 
 	/*
-	 * Now setup the pagetables for our kernel direct
-	 * mapped region.
+	 * Map some RAM to cover the kernel image and its .bss section.
+	 * Push some additional 64MB to give the early memory allocator
+	 * some initial room (beware: real RAM might or might not be there
+	 * across the whole area). The real memory map will be established
+	 * (extended or shrunk) later.
 	 */
-	mov	r3, pc
-	mov	r3, r3, lsr #SECTION_SHIFT
-	orr	r3, r7, r3, lsl #SECTION_SHIFT
-	add	r0, r4,  #(KERNEL_START & 0xff000000) >> (SECTION_SHIFT - PMD_ORDER)
-	str	r3, [r0, #((KERNEL_START & 0x00f00000) >> SECTION_SHIFT) << PMD_ORDER]!
-	ldr	r6, =(KERNEL_END - 1)
-	add	r0, r0, #1 << PMD_ORDER
+	add	r0, r4, #PAGE_OFFSET >> (SECTION_SHIFT - PMD_ORDER)
+	ldr	r6, =(_end + 64 * 1024 * 1024)
+	orr	r3, r8, r7
 	add	r6, r4, r6, lsr #(SECTION_SHIFT - PMD_ORDER)
-1:	cmp	r0, r6
+1:	str	r3, [r0], #1 << PMD_ORDER
 	add	r3, r3, #1 << SECTION_SHIFT
-	strls	r3, [r0], #1 << PMD_ORDER
+	cmp	r0, r6
 	bls	1b
 
 #ifdef CONFIG_XIP_KERNEL
 	/*
-	 * Map some ram to cover our .data and .bss areas.
+	 * Map the kernel image separately as it is not located in RAM.
 	 */
-	add	r3, r8, #TEXT_OFFSET
-	orr	r3, r3, r7
-	add	r0, r4,  #(KERNEL_RAM_VADDR & 0xff000000) >> (SECTION_SHIFT - PMD_ORDER)
-	str	r3, [r0, #(KERNEL_RAM_VADDR & 0x00f00000) >> (SECTION_SHIFT - PMD_ORDER)]!
-	ldr	r6, =(_end - 1)
-	add	r0, r0, #4
+#define XIP_START XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR)
+	mov	r3, pc
+	mov	r3, r3, lsr #SECTION_SHIFT
+	orr	r3, r7, r3, lsl #SECTION_SHIFT
+	add	r0, r4,  #(XIP_START & 0xff000000) >> (SECTION_SHIFT - PMD_ORDER)
+	str	r3, [r0, #((XIP_START & 0x00f00000) >> SECTION_SHIFT) << PMD_ORDER]!
+	ldr	r6, =(_edata_loc - 1)
+	add	r0, r0, #1 << PMD_ORDER
 	add	r6, r4, r6, lsr #(SECTION_SHIFT - PMD_ORDER)
 1:	cmp	r0, r6
-	add	r3, r3, #1 << 20
-	strls	r3, [r0], #4
+	add	r3, r3, #1 << SECTION_SHIFT
+	strls	r3, [r0], #1 << PMD_ORDER
 	bls	1b
 #endif
 
 	/*
-	 * Then map boot params address in r2 or the first 1MB (2MB with LPAE)
-	 * of ram if boot params address is not specified.
+	 * Then map boot params address in r2 if specified.
 	 */
 	mov	r0, r2, lsr #SECTION_SHIFT
 	movs	r0, r0, lsl #SECTION_SHIFT
-	moveq	r0, r8
-	sub	r3, r0, r8
-	add	r3, r3, #PAGE_OFFSET
-	add	r3, r4, r3, lsr #(SECTION_SHIFT - PMD_ORDER)
-	orr	r6, r7, r0
-	str	r6, [r3]
+	subne	r3, r0, r8
+	addne	r3, r3, #PAGE_OFFSET
+	addne	r3, r4, r3, lsr #(SECTION_SHIFT - PMD_ORDER)
+	orrne	r6, r7, r0
+	strne	r6, [r3]
 
 #ifdef CONFIG_DEBUG_LL
 #if !defined(CONFIG_DEBUG_ICEDCC) && !defined(CONFIG_DEBUG_SEMIHOSTING)

---------- >8

From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Wed, 27 Jun 2012 23:58:39 -0400
Subject: [PATCH] ARM: adjust the memblock limit according to the available memory mapping

Early on the only accessible memory comes from the initial mapping
performed in head.S, minus those page table entries cleared in
prepare_page_table().  Eventually the full lowmem is available once
map_lowmem() has mapped it.  Let's have this properly reflected in the
memblock allocator limit.

Signed-off-by: Nicolas Pitre <nico@linaro.org>

diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index e5dad60b55..7260e98dd4 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -932,7 +932,6 @@ void __init sanity_check_meminfo(void)
 #endif
 	meminfo.nr_banks = j;
 	high_memory = __va(arm_lowmem_limit - 1) + 1;
-	memblock_set_current_limit(arm_lowmem_limit);
 }
 
 static inline void prepare_page_table(void)
@@ -967,6 +966,13 @@ static inline void prepare_page_table(void)
 	for (addr = __phys_to_virt(end);
 	     addr < VMALLOC_START; addr += PMD_SIZE)
 		pmd_clear(pmd_off_k(addr));
+
+	/*
+	 * The code in head.S has set a mapping up to _end + 64MB.
+	 * The code above has cleared any mapping from 'end' upwards.
+	 * Let's have this reflected in the available memory from memblock.
+	 */ 
+	memblock_set_current_limit(min(end, virt_to_phys(_end) + SZ_64M));
 }
 
 #ifdef CONFIG_ARM_LPAE
@@ -1113,6 +1119,8 @@ static void __init map_lowmem(void)
 
 		create_mapping(&map);
 	}
+
+	memblock_set_current_limit(arm_lowmem_limit);
 }
 
 /*
@@ -1123,8 +1131,6 @@ void __init paging_init(struct machine_desc *mdesc)
 {
 	void *zero_page;
 
-	memblock_set_current_limit(arm_lowmem_limit);
-
 	build_mem_type_table();
 	prepare_page_table();
 	map_lowmem();

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH] [RESEND] arm: limit memblock base address for early_pte_alloc
  2012-06-28  4:33   ` Nicolas Pitre
@ 2012-06-28  4:46     ` Minchan Kim
  2012-06-28  9:08     ` Russell King - ARM Linux
  1 sibling, 0 replies; 14+ messages in thread
From: Minchan Kim @ 2012-06-28  4:46 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-arm-kernel

종성 선임님.

제가 장비가 없는 문제로 아래의 패치를 테스트 할 수 없는데, 시간 되시면
테스트 후 Nicolas에게 feedback좀 부탁드리겠습니다. 문제는 posting해놓고,
follow up하지 않게 되면,,,, 신뢰의 문제라서요. -_-;;

러쎌이 어떻게 나올지 모르겠네요. 계속 그냥 1M로 해서 쓰라고 할지...

감사합니다.

On 06/28/2012 01:33 PM, Nicolas Pitre wrote:

> On Wed, 27 Jun 2012, Dave Martin wrote:
> 
>> On Tue, Jun 05, 2012 at 04:11:52PM +0900, Minchan Kim wrote:
>>> If we do arm_memblock_steal with a page which is not aligned with section size,
>>> panic can happen during boot by page fault in map_lowmem.
>>>
>>> Detail:
>>>
>>> 1) mdesc->reserve can steal a page which is allocated at 0x1ffff000 by memblock
>>>    which prefers tail pages of regions.
>>> 2) map_lowmem maps 0x00000000 - 0x1fe00000
>>> 3) map_lowmem try to map 0x1fe00000 but it's not aligned by section due to 1.
>>> 4) calling alloc_init_pte allocates a new page for new pte by memblock_alloc
>>> 5) allocated memory for pte is 0x1fffe000 -> it's not mapped yet.
>>> 6) memset(ptr, 0, sz) in early_alloc_aligned got PANICed!
>>>
>>> This patch fix it by limiting memblock to mapped memory range.
>>>
>>> Reported-by: Jongsung Kim <neidhard.kim@lge.com>
>>> Suggested-by: Chanho Min <chanho.min@lge.com>
>>> Signed-off-by: Minchan Kim <minchan@kernel.org>
>>> ---
>>>  arch/arm/mm/mmu.c |   37 ++++++++++++++++++++++---------------
>>>  1 file changed, 22 insertions(+), 15 deletions(-)
>>>
>>> diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
>>> index e5dad60..a15aafe 100644
>>> --- a/arch/arm/mm/mmu.c
>>> +++ b/arch/arm/mm/mmu.c
>>> @@ -594,7 +594,7 @@ static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
>>>  
>>>  static void __init alloc_init_section(pud_t *pud, unsigned long addr,
>>>  				      unsigned long end, phys_addr_t phys,
>>> -				      const struct mem_type *type)
>>> +				      const struct mem_type *type, bool lowmem)
>>>  {
>>>  	pmd_t *pmd = pmd_offset(pud, addr);
>>>  
>>> @@ -619,6 +619,8 @@ static void __init alloc_init_section(pud_t *pud, unsigned long addr,
>>>  
>>>  		flush_pmd_entry(p);
>>>  	} else {
>>> +		if (lowmem)
>>> +			memblock_set_current_limit(__pa(addr));
>>
>> I thought of doing something similar to this.  A concern I have is that
>> when mapping the first few sections, is it guaranteed that there will be
>> enough memory available below memblock.current_limit to allocate extra
>> page tables, in general?  I think we could get failures here even though
>> there is spare (unmapped) memory above the limit.
>>
>> An alternative approach would be to install temporary section mappings
>> to cover all of lowmem before starting to create the real mappings.
>> The memblock allocator still knows which regions are reserved, so we
>> shouldn't end up scribbling on pages which are really not supposed to
>> be mapped in lowmem.
>>
>> That feels like it should work, but would involve extra overheads, more
>> flushing etc. to purge all those temporary section mappings from the TLB.
> 
> This is all rather fragile and inelegant.
> 
> I propose the following two patches instead -- both patches are included 
> inline not to break the email thread.  What do you think?
> 
> ---------- >8
> 
> From: Nicolas Pitre <nicolas.pitre@linaro.org>
> Date: Wed, 27 Jun 2012 23:02:31 -0400
> Subject: [PATCH] ARM: head.S: simplify initial page table mapping
> 
> Let's map the initial RAM up to the end of the kernel.bss plus 64MB
> instead of the strict kernel image area.  This simplifies the code
> as the kernel image only needs to be handled specially in the XIP case.
> This also give some room for the early memory allocator to use before
> the real mapping is finally installed with the actual amount of memory.
> 
> Signed-off-by: Nicolas Pitre <nico@linaro.org>
> 
> diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
> index 835898e7d7..cc3103fb66 100644
> --- a/arch/arm/kernel/head.S
> +++ b/arch/arm/kernel/head.S
> @@ -55,14 +55,6 @@
>  	add	\rd, \phys, #TEXT_OFFSET - PG_DIR_SIZE
>  	.endm
>  
> -#ifdef CONFIG_XIP_KERNEL
> -#define KERNEL_START	XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR)
> -#define KERNEL_END	_edata_loc
> -#else
> -#define KERNEL_START	KERNEL_RAM_VADDR
> -#define KERNEL_END	_end
> -#endif
> -
>  /*
>   * Kernel startup entry point.
>   * ---------------------------
> @@ -218,51 +210,50 @@ __create_page_tables:
>  	blo	1b
>  
>  	/*
> -	 * Now setup the pagetables for our kernel direct
> -	 * mapped region.
> +	 * Map some RAM to cover the kernel image and its .bss section.
> +	 * Push some additional 64MB to give the early memory allocator
> +	 * some initial room (beware: real RAM might or might not be there
> +	 * across the whole area). The real memory map will be established
> +	 * (extended or shrunk) later.
>  	 */
> -	mov	r3, pc
> -	mov	r3, r3, lsr #SECTION_SHIFT
> -	orr	r3, r7, r3, lsl #SECTION_SHIFT
> -	add	r0, r4,  #(KERNEL_START & 0xff000000) >> (SECTION_SHIFT - PMD_ORDER)
> -	str	r3, [r0, #((KERNEL_START & 0x00f00000) >> SECTION_SHIFT) << PMD_ORDER]!
> -	ldr	r6, =(KERNEL_END - 1)
> -	add	r0, r0, #1 << PMD_ORDER
> +	add	r0, r4, #PAGE_OFFSET >> (SECTION_SHIFT - PMD_ORDER)
> +	ldr	r6, =(_end + 64 * 1024 * 1024)
> +	orr	r3, r8, r7
>  	add	r6, r4, r6, lsr #(SECTION_SHIFT - PMD_ORDER)
> -1:	cmp	r0, r6
> +1:	str	r3, [r0], #1 << PMD_ORDER
>  	add	r3, r3, #1 << SECTION_SHIFT
> -	strls	r3, [r0], #1 << PMD_ORDER
> +	cmp	r0, r6
>  	bls	1b
>  
>  #ifdef CONFIG_XIP_KERNEL
>  	/*
> -	 * Map some ram to cover our .data and .bss areas.
> +	 * Map the kernel image separately as it is not located in RAM.
>  	 */
> -	add	r3, r8, #TEXT_OFFSET
> -	orr	r3, r3, r7
> -	add	r0, r4,  #(KERNEL_RAM_VADDR & 0xff000000) >> (SECTION_SHIFT - PMD_ORDER)
> -	str	r3, [r0, #(KERNEL_RAM_VADDR & 0x00f00000) >> (SECTION_SHIFT - PMD_ORDER)]!
> -	ldr	r6, =(_end - 1)
> -	add	r0, r0, #4
> +#define XIP_START XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR)
> +	mov	r3, pc
> +	mov	r3, r3, lsr #SECTION_SHIFT
> +	orr	r3, r7, r3, lsl #SECTION_SHIFT
> +	add	r0, r4,  #(XIP_START & 0xff000000) >> (SECTION_SHIFT - PMD_ORDER)
> +	str	r3, [r0, #((XIP_START & 0x00f00000) >> SECTION_SHIFT) << PMD_ORDER]!
> +	ldr	r6, =(_edata_loc - 1)
> +	add	r0, r0, #1 << PMD_ORDER
>  	add	r6, r4, r6, lsr #(SECTION_SHIFT - PMD_ORDER)
>  1:	cmp	r0, r6
> -	add	r3, r3, #1 << 20
> -	strls	r3, [r0], #4
> +	add	r3, r3, #1 << SECTION_SHIFT
> +	strls	r3, [r0], #1 << PMD_ORDER
>  	bls	1b
>  #endif
>  
>  	/*
> -	 * Then map boot params address in r2 or the first 1MB (2MB with LPAE)
> -	 * of ram if boot params address is not specified.
> +	 * Then map boot params address in r2 if specified.
>  	 */
>  	mov	r0, r2, lsr #SECTION_SHIFT
>  	movs	r0, r0, lsl #SECTION_SHIFT
> -	moveq	r0, r8
> -	sub	r3, r0, r8
> -	add	r3, r3, #PAGE_OFFSET
> -	add	r3, r4, r3, lsr #(SECTION_SHIFT - PMD_ORDER)
> -	orr	r6, r7, r0
> -	str	r6, [r3]
> +	subne	r3, r0, r8
> +	addne	r3, r3, #PAGE_OFFSET
> +	addne	r3, r4, r3, lsr #(SECTION_SHIFT - PMD_ORDER)
> +	orrne	r6, r7, r0
> +	strne	r6, [r3]
>  
>  #ifdef CONFIG_DEBUG_LL
>  #if !defined(CONFIG_DEBUG_ICEDCC) && !defined(CONFIG_DEBUG_SEMIHOSTING)
> 
> ---------- >8
> 
> From: Nicolas Pitre <nicolas.pitre@linaro.org>
> Date: Wed, 27 Jun 2012 23:58:39 -0400
> Subject: [PATCH] ARM: adjust the memblock limit according to the available memory mapping
> 
> Early on the only accessible memory comes from the initial mapping
> performed in head.S, minus those page table entries cleared in
> prepare_page_table().  Eventually the full lowmem is available once
> map_lowmem() has mapped it.  Let's have this properly reflected in the
> memblock allocator limit.
> 
> Signed-off-by: Nicolas Pitre <nico@linaro.org>
> 
> diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
> index e5dad60b55..7260e98dd4 100644
> --- a/arch/arm/mm/mmu.c
> +++ b/arch/arm/mm/mmu.c
> @@ -932,7 +932,6 @@ void __init sanity_check_meminfo(void)
>  #endif
>  	meminfo.nr_banks = j;
>  	high_memory = __va(arm_lowmem_limit - 1) + 1;
> -	memblock_set_current_limit(arm_lowmem_limit);
>  }
>  
>  static inline void prepare_page_table(void)
> @@ -967,6 +966,13 @@ static inline void prepare_page_table(void)
>  	for (addr = __phys_to_virt(end);
>  	     addr < VMALLOC_START; addr += PMD_SIZE)
>  		pmd_clear(pmd_off_k(addr));
> +
> +	/*
> +	 * The code in head.S has set a mapping up to _end + 64MB.
> +	 * The code above has cleared any mapping from 'end' upwards.
> +	 * Let's have this reflected in the available memory from memblock.
> +	 */ 
> +	memblock_set_current_limit(min(end, virt_to_phys(_end) + SZ_64M));
>  }
>  
>  #ifdef CONFIG_ARM_LPAE
> @@ -1113,6 +1119,8 @@ static void __init map_lowmem(void)
>  
>  		create_mapping(&map);
>  	}
> +
> +	memblock_set_current_limit(arm_lowmem_limit);
>  }
>  
>  /*
> @@ -1123,8 +1131,6 @@ void __init paging_init(struct machine_desc *mdesc)
>  {
>  	void *zero_page;
>  
> -	memblock_set_current_limit(arm_lowmem_limit);
> -
>  	build_mem_type_table();
>  	prepare_page_table();
>  	map_lowmem();
> 
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
> 



-- 
Kind regards,
Minchan Kim


^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [PATCH] [RESEND] arm: limit memblock base address for early_pte_alloc
  2012-06-27 16:02   ` Dave Martin
@ 2012-06-28  5:43     ` Kim, Jong-Sung
  2012-06-28  6:25       ` Nicolas Pitre
  0 siblings, 1 reply; 14+ messages in thread
From: Kim, Jong-Sung @ 2012-06-28  5:43 UTC (permalink / raw)
  To: 'Dave Martin'
  Cc: 'Minchan Kim', 'Russell King',
	'Nicolas Pitre', 'Catalin Marinas',
	'Chanho Min',
	linux-kernel, linux-mm, linux-arm-kernel

> From: Dave Martin [mailto:dave.martin@linaro.org]
> Sent: Thursday, June 28, 2012 1:02 AM
> 
> For me, it appears that this block just contains the initial region passed
> in ATAG_MEM or on the command line, with some reservations for
> swapper_pg_dir, the kernel text/data, device tree and initramfs.
> 
> So far as I can tell, the only memory guaranteed to be mapped here is the
> kernel image: there may be no guarantee that there is any unused space in
> this region which could be used to allocate extra page tables.
> The rest appears during the execution of map_lowmem().
> 
> Cheers
> ---Dave

Thank you for your comment, Dave! It was not that sophisticated choice, but
I thought that normal embedded system trying to reduce the BOM would have a
big-enough first memblock memory region. However you're right. There can be
exceptional systems. Then, how do you think about following manner:

diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index e5dad60..0bc5316 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -1094,6 +1094,16 @@ static void __init kmap_init(void)
 static void __init map_lowmem(void)
 {
        struct memblock_region *reg;
+       phys_addr_t pmd_map_end = 0;
+
+       for_each_memblock(memory, reg) {
+               pmd_map_end = reg->base + reg->size;
+               if((reg->base | reg->size) & ~PMD_MASK)
+                       break;
+       }
+       if(pmd_map_end > lowmem_limit)
+               pmd_map_end = lowmem_limit;
+       memblock_set_current_limit(pmd_map_end & PMD_MASK);
 
        /* Map all the lowmem memory banks. */
        for_each_memblock(memory, reg) {
@@ -1113,6 +1123,8 @@ static void __init map_lowmem(void)
 
                create_mapping(&map);
        }
+
+       memblock_set_current_limit(lowmem_limit);
 }
 
 /*
@@ -1123,8 +1135,6 @@ void __init paging_init(struct machine_desc *mdesc)
 {
        void *zero_page;
 
-       memblock_set_current_limit(arm_lowmem_limit);
-
        build_mem_type_table();
        prepare_page_table();
        map_lowmem();

This will not limit the PTE-allocation to near the end of first bank.




^ permalink raw reply related	[flat|nested] 14+ messages in thread

* RE: [PATCH] [RESEND] arm: limit memblock base address for early_pte_alloc
  2012-06-27 19:18   ` Russell King - ARM Linux
@ 2012-06-28  6:08     ` Kim, Jong-Sung
  0 siblings, 0 replies; 14+ messages in thread
From: Kim, Jong-Sung @ 2012-06-28  6:08 UTC (permalink / raw)
  To: 'Russell King - ARM Linux'
  Cc: 'Minchan Kim', 'Nicolas Pitre',
	'Catalin Marinas',
	linux-arm-kernel, linux-kernel, 'Chanho Min',
	linux-mm

> From: Russell King - ARM Linux [mailto:linux@arm.linux.org.uk]
> Sent: Thursday, June 28, 2012 4:18 AM
> On Fri, Jun 08, 2012 at 10:58:50PM +0900, Kim, Jong-Sung wrote:
> >
> > May I suggest another simple approach? The first continuous couples of
> > sections are always safely section-mapped inside alloc_init_section
> funtion.
> > So, by limiting memblock_alloc to the end of the first continuous
> > couples of sections at the start of map_lowmem, map_lowmem can safely
> > memblock_alloc & memset even if we have one or more section-unaligned
> > memory regions. The limit can be extended back to arm_lowmem_limit after
> the map_lowmem is done.
> 
> No.  What if the first block of memory is not large enough to handle all
the
> allocations?
> 
Thank you for your comment, Russell. I sent a modified patch not to limit to
the first memory memblock_region as a reply to Dave's message.

> I think the real problem is folk trying to reserve small amounts.  I have
> said all reservations must be aligned to 1MB.
>
Ok, now I know your thought about arm_memblock_steal(). Then, how about
adding a simple aligning to prevent the possible problem just like me:

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index f54d592..d0daf0d 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -324,6 +324,8 @@ phys_addr_t __init arm_memblock_steal(phys_addr_t size,
phys
 
        BUG_ON(!arm_memblock_steal_permitted);
 
+       size = ALIGN(size, SECTION_SIZE);
+
        phys = memblock_alloc(size, align);
        memblock_free(phys, size);
        memblock_remove(phys, size);

or, leaving a few comments about the restriction kindly..?




^ permalink raw reply related	[flat|nested] 14+ messages in thread

* RE: [PATCH] [RESEND] arm: limit memblock base address for early_pte_alloc
  2012-06-28  5:43     ` Kim, Jong-Sung
@ 2012-06-28  6:25       ` Nicolas Pitre
  2012-06-28  6:54         ` Kim, Jong-Sung
  0 siblings, 1 reply; 14+ messages in thread
From: Nicolas Pitre @ 2012-06-28  6:25 UTC (permalink / raw)
  To: Kim, Jong-Sung
  Cc: 'Dave Martin', 'Minchan Kim',
	'Russell King', 'Catalin Marinas',
	'Chanho Min',
	linux-kernel, linux-mm, linux-arm-kernel

On Thu, 28 Jun 2012, Kim, Jong-Sung wrote:

> > From: Dave Martin [mailto:dave.martin@linaro.org]
> > Sent: Thursday, June 28, 2012 1:02 AM
> > 
> > For me, it appears that this block just contains the initial region passed
> > in ATAG_MEM or on the command line, with some reservations for
> > swapper_pg_dir, the kernel text/data, device tree and initramfs.
> > 
> > So far as I can tell, the only memory guaranteed to be mapped here is the
> > kernel image: there may be no guarantee that there is any unused space in
> > this region which could be used to allocate extra page tables.
> > The rest appears during the execution of map_lowmem().
> > 
> > Cheers
> > ---Dave
> 
> Thank you for your comment, Dave! It was not that sophisticated choice, but
> I thought that normal embedded system trying to reduce the BOM would have a
> big-enough first memblock memory region. However you're right. There can be
> exceptional systems. Then, how do you think about following manner:
[...]

This still has some possibilities for failure.

Please have a look at the two patches I've posted to fix this in a 
better way.


Nicolas

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [PATCH] [RESEND] arm: limit memblock base address for early_pte_alloc
  2012-06-28  6:25       ` Nicolas Pitre
@ 2012-06-28  6:54         ` Kim, Jong-Sung
  0 siblings, 0 replies; 14+ messages in thread
From: Kim, Jong-Sung @ 2012-06-28  6:54 UTC (permalink / raw)
  To: 'Nicolas Pitre'
  Cc: 'Dave Martin', 'Minchan Kim',
	'Russell King', 'Catalin Marinas',
	'Chanho Min',
	linux-kernel, linux-mm, linux-arm-kernel

> From: Nicolas Pitre [mailto:nicolas.pitre@linaro.org]
> Sent: Thursday, June 28, 2012 3:26 PM
> 
> On Thu, 28 Jun 2012, Kim, Jong-Sung wrote:
> 
> > Thank you for your comment, Dave! It was not that sophisticated
> > choice, but I thought that normal embedded system trying to reduce the
> > BOM would have a big-enough first memblock memory region. However
> > you're right. There can be exceptional systems. Then, how do you think
> about following manner:
> [...]
> 
> This still has some possibilities for failure.
> 
Can you kindly describe the possible failure path?

> Please have a look at the two patches I've posted to fix this in a better
> way.
> 
I'm setting up for your elegant patches. ;-)




^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] [RESEND] arm: limit memblock base address for early_pte_alloc
  2012-06-28  4:33   ` Nicolas Pitre
  2012-06-28  4:46     ` Minchan Kim
@ 2012-06-28  9:08     ` Russell King - ARM Linux
  2012-06-28 17:50       ` Nicolas Pitre
  1 sibling, 1 reply; 14+ messages in thread
From: Russell King - ARM Linux @ 2012-06-28  9:08 UTC (permalink / raw)
  To: Nicolas Pitre
  Cc: Dave Martin, Minchan Kim, Catalin Marinas, Chanho Min,
	linux-kernel, linux-mm, Jongsung Kim, linux-arm-kernel

On Thu, Jun 28, 2012 at 12:33:02AM -0400, Nicolas Pitre wrote:
> I propose the following two patches instead -- both patches are included 
> inline not to break the email thread.  What do you think?
> 
> ---------- >8
> 
> From: Nicolas Pitre <nicolas.pitre@linaro.org>
> Date: Wed, 27 Jun 2012 23:02:31 -0400
> Subject: [PATCH] ARM: head.S: simplify initial page table mapping
> 
> Let's map the initial RAM up to the end of the kernel.bss plus 64MB
> instead of the strict kernel image area.  This simplifies the code
> as the kernel image only needs to be handled specially in the XIP case.
> This also give some room for the early memory allocator to use before
> the real mapping is finally installed with the actual amount of memory.
> 
> Signed-off-by: Nicolas Pitre <nico@linaro.org>

Why is this needed?  The initial allocation is sufficient, and you really
should not be wanting to _allocate_ memory in your ->reserve method and
have it be _usable_ at that point.

> Early on the only accessible memory comes from the initial mapping
> performed in head.S, minus those page table entries cleared in
> prepare_page_table().  Eventually the full lowmem is available once
> map_lowmem() has mapped it.  Let's have this properly reflected in the
> memblock allocator limit.

Err, I don't think you understand what's going on here.

The sequence is:

1. setup the initial mappings so we can run the kernel in virtual space.
2. provide the memory areas to memblock
3. ask the platform to reserve whatever memory it wants from memblock
   [this means using memblock_reserve or arm_memblock_steal).  The
   reserved memory is *not* expected to be mapped at this point, and is
   therefore inaccessible.
4. Setup the lowmem mappings.

And when we're setting up the lowmem mappings, we do *not* expect to
create any non-section page mappings, which again means we have no reason
to use the memblock allocator to obtain memory that we want to immediately
use.

So I don't know where you're claim of being "fragile" is coming from.

What is fragile is people wanting to use arm_memblock_steal() without
following the rules for it I layed down.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] [RESEND] arm: limit memblock base address for early_pte_alloc
  2012-06-28  9:08     ` Russell King - ARM Linux
@ 2012-06-28 17:50       ` Nicolas Pitre
  0 siblings, 0 replies; 14+ messages in thread
From: Nicolas Pitre @ 2012-06-28 17:50 UTC (permalink / raw)
  To: Russell King - ARM Linux
  Cc: Dave Martin, Minchan Kim, Catalin Marinas, Chanho Min,
	linux-kernel, linux-mm, Jongsung Kim, linux-arm-kernel

On Thu, 28 Jun 2012, Russell King - ARM Linux wrote:

> Err, I don't think you understand what's going on here.
> 
> The sequence is:
> 
> 1. setup the initial mappings so we can run the kernel in virtual space.
> 2. provide the memory areas to memblock
> 3. ask the platform to reserve whatever memory it wants from memblock
>    [this means using memblock_reserve or arm_memblock_steal).  The
>    reserved memory is *not* expected to be mapped at this point, and is
>    therefore inaccessible.
> 4. Setup the lowmem mappings.

I do understand that pretty well so far.

> And when we're setting up the lowmem mappings, we do *not* expect to
> create any non-section page mappings, which again means we have no reason
> to use the memblock allocator to obtain memory that we want to immediately
> use.

And why does this have to remain so?

> So I don't know where you're claim of being "fragile" is coming from.

It doesn't come from anything you've described so far. It comes from 
those previous attempts at lifting this limitation.  I think that my 
proposal is much less fragile than the other ones.

> What is fragile is people wanting to use arm_memblock_steal() without
> following the rules for it I layed down.

What about enhancing your rules if the technical limitations they were 
based on are lifted?


Nicolas

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2012-06-28 17:50 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-06-05  7:11 [PATCH] [RESEND] arm: limit memblock base address for early_pte_alloc Minchan Kim
2012-06-08 13:58 ` Kim, Jong-Sung
2012-06-27 16:02   ` Dave Martin
2012-06-28  5:43     ` Kim, Jong-Sung
2012-06-28  6:25       ` Nicolas Pitre
2012-06-28  6:54         ` Kim, Jong-Sung
2012-06-27 19:18   ` Russell King - ARM Linux
2012-06-28  6:08     ` Kim, Jong-Sung
2012-06-19  8:38 ` Minchan Kim
2012-06-27 16:12 ` Dave Martin
2012-06-28  4:33   ` Nicolas Pitre
2012-06-28  4:46     ` Minchan Kim
2012-06-28  9:08     ` Russell King - ARM Linux
2012-06-28 17:50       ` Nicolas Pitre

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).