linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* IA64 non-contiguous memory space bugs
@ 2006-02-22  0:13 David Gibson
  2006-02-22  0:39 ` David S. Miller
                   ` (3 more replies)
  0 siblings, 4 replies; 20+ messages in thread
From: David Gibson @ 2006-02-22  0:13 UTC (permalink / raw)
  To: linux-ia64, linux-kernel

Quite some time ago, I found (by inspection) an ia64 specific bug
related to its non-contiguous user address space.  I've never done
anything about it, because I don't have an ia64 to test on - but
somebody should fix it.  Recently I've spotted another possible bug,
also by inspection - I don't know enough about ia64 to tell if it's a
real problem or not.

First bug (confirmed many months ago by Chris Wedgwood) - you can get
weird effects if you attempt to mmap() something into one of the
address space gaps.  The ia64 outer wrapper for mmap2() tries to
prevent it, but doesn't do a good enough job, it's still possible
indirectly with shmat() and maybe mremap().  Basic trouble is that
most of the checks applied by the generic code assume that everything
between 0 and TASK_SIZE is valid.  Patch below addresses this, or did
- quite likely it's bitrotted since I wrote it.

Second problem is in the hugepage logic in free_pgtables()
(mm/memory.c).  As far as I can tell it's complete crap, and only
works by accident, for different accidental reasons on ppc64 and ia64,
the only archs that have a non-trivial is_hugepage_only_range().
Except that I'm not sure it does entirely work by accident on ia64:
suppose a process has a hugepage mapping that begins some way after
the beginning of the hugepage address range.  Before
hugetlb_free_pgd_range() gets called on that area, it will be called
on the next normal page VMA down - but with an end address at the
beginning of the hugepage VMA and so extending into the hugepage
address range.  I don't really understand the ia64 pagetable mapping
stuff well enough to tell if that's dangerous or not.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

Index: working-2.6/mm/mremap.c
===================================================================
--- working-2.6.orig/mm/mremap.c	2004-08-30 10:23:00.000000000 +1000
+++ working-2.6/mm/mremap.c	2004-09-24 15:02:18.169776624 +1000
@@ -274,7 +274,7 @@
 		if (!(flags & MREMAP_MAYMOVE))
 			goto out;
 
-		if (new_len > TASK_SIZE || new_addr > TASK_SIZE - new_len)
+		if (! GOOD_TASK_VM_RANGE(new_addr, new_len))
 			goto out;
 
 		/* Check if the location we're moving into overlaps the
@@ -351,6 +351,8 @@
 	    !((flags & MREMAP_FIXED) && (addr != new_addr)) &&
 	    (old_len != new_len || !(flags & MREMAP_MAYMOVE))) {
 		unsigned long max_addr = TASK_SIZE;
+		if (max_addr > REGION_MAX(vma->vm_start))
+			max_addr = REGION_MAX(vma->vm_start);
 		if (vma->vm_next)
 			max_addr = vma->vm_next->vm_start;
 		/* can we just expand the current mapping? */
Index: working-2.6/mm/mmap.c
===================================================================
--- working-2.6.orig/mm/mmap.c	2004-09-07 10:38:00.000000000 +1000
+++ working-2.6/mm/mmap.c	2004-09-24 15:02:18.170776472 +1000
@@ -1209,7 +1209,7 @@
 	if (flags & MAP_FIXED) {
 		unsigned long ret;
 
-		if (addr > TASK_SIZE - len)
+		if (! GOOD_TASK_VM_RANGE(addr, len))
 			return -ENOMEM;
 		if (addr & ~PAGE_MASK)
 			return -EINVAL;
@@ -1658,7 +1658,7 @@
 	unsigned long end;
 	struct vm_area_struct *mpnt, *prev, *last;
 
-	if ((start & ~PAGE_MASK) || start > TASK_SIZE || len > TASK_SIZE-start)
+	if ((start & ~PAGE_MASK) || !GOOD_TASK_VM_RANGE(start, len))
 		return -EINVAL;
 
 	if ((len = PAGE_ALIGN(len)) == 0)
@@ -1749,7 +1749,7 @@
 	if (!len)
 		return addr;
 
-	if ((addr + len) > TASK_SIZE || (addr + len) < addr)
+	if (! GOOD_TASK_VM_RANGE(addr, len))
 		return -EINVAL;
 
 	/*
Index: working-2.6/include/asm-ia64/page.h
===================================================================
--- working-2.6.orig/include/asm-ia64/page.h	2004-09-10 13:26:27.000000000 +1000
+++ working-2.6/include/asm-ia64/page.h	2004-09-24 15:02:18.171776320 +1000
@@ -123,6 +123,8 @@
 #define REGION_SIZE		REGION_NUMBER(1)
 #define REGION_KERNEL		7
 
+#define REGION_MAX(x)	((REGION_NUMBER(x)<<61) + RGN_MAP_LIMIT)
+
 #ifdef CONFIG_HUGETLB_PAGE
 # define htlbpage_to_page(x)	((REGION_NUMBER(x) << 61)				\
 				 | (REGION_OFFSET(x) >> (HPAGE_SHIFT-PAGE_SHIFT)))
Index: working-2.6/include/linux/mm.h
===================================================================
--- working-2.6.orig/include/linux/mm.h	2004-09-07 10:38:00.000000000 +1000
+++ working-2.6/include/linux/mm.h	2004-09-24 15:02:18.172776168 +1000
@@ -41,6 +41,13 @@
 #define MM_VM_SIZE(mm)	TASK_SIZE
 #endif
 
+#ifndef REGION_MAX
+#define REGION_MAX(addr)	TASK_SIZE
+#endif
+
+#define GOOD_TASK_VM_RANGE(addr, len) \
+	( ((addr)+(len) >= (addr)) || ((addr)+(len) <= TASK_SIZE) \
+	  || ((addr)+(len) <= REGION_MAX(addr)) )
 /*
  * Linux kernel virtual memory manager primitives.
  * The idea being to have a "virtual" mm in the same way
Index: working-2.6/fs/binfmt_elf.c
===================================================================
--- working-2.6.orig/fs/binfmt_elf.c	2004-09-24 10:14:10.000000000 +1000
+++ working-2.6/fs/binfmt_elf.c	2004-09-24 15:02:18.174775864 +1000
@@ -81,7 +81,8 @@
 		.min_coredump	= ELF_EXEC_PAGESIZE
 };
 
-#define BAD_ADDR(x)	((unsigned long)(x) > TASK_SIZE)
+#define BAD_ADDR(x)	(((unsigned long)(x) > TASK_SIZE) \
+		 || ((unsigned long)(x) >= REGION_MAX((unsigned long)x)) )
 
 static int set_brk(unsigned long start, unsigned long end)
 {
@@ -370,8 +371,8 @@
 	     * <= p_memsize so it is only necessary to check p_memsz.
 	     */
 	    k = load_addr + eppnt->p_vaddr;
-	    if (k > TASK_SIZE || eppnt->p_filesz > eppnt->p_memsz ||
-		eppnt->p_memsz > TASK_SIZE || TASK_SIZE - eppnt->p_memsz < k) {
+	    if ((eppnt->p_filesz > eppnt->p_memsz) ||
+		! GOOD_TASK_VM_RANGE(k, eppnt->p_memsz)) {
 	        error = -ENOMEM;
 		goto out_close;
 	    }
@@ -798,9 +799,8 @@
 		 * allowed task size. Note that p_filesz must always be
 		 * <= p_memsz so it is only necessary to check p_memsz.
 		 */
-		if (k > TASK_SIZE || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
-		    elf_ppnt->p_memsz > TASK_SIZE ||
-		    TASK_SIZE - elf_ppnt->p_memsz < k) {
+		if (elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
+		    ! GOOD_TASK_VM_RANGE(k, elf_ppnt->p_memsz)) {
 			/* set_brk can never work.  Avoid overflows.  */
 			send_sig(SIGKILL, current, 0);
 			goto out_free_dentry;
Index: working-2.6/arch/ia64/kernel/sys_ia64.c
===================================================================
--- working-2.6.orig/arch/ia64/kernel/sys_ia64.c	2004-08-09 09:51:26.000000000 +1000
+++ working-2.6/arch/ia64/kernel/sys_ia64.c	2004-09-24 15:02:18.175775712 +1000
@@ -211,17 +211,6 @@
 		goto out;
 	}
 
-	/*
-	 * Don't permit mappings into unmapped space, the virtual page table of a region,
-	 * or across a region boundary.  Note: RGN_MAP_LIMIT is equal to 2^n-PAGE_SIZE
-	 * (for some integer n <= 61) and len > 0.
-	 */
-	roff = REGION_OFFSET(addr);
-	if ((len > RGN_MAP_LIMIT) || (roff > (RGN_MAP_LIMIT - len))) {
-		addr = -EINVAL;
-		goto out;
-	}
-
 	down_write(&current->mm->mmap_sem);
 	addr = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
 	up_write(&current->mm->mmap_sem);


----- End forwarded message -----

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

^ permalink raw reply	[flat|nested] 20+ messages in thread
* RE: IA64 non-contiguous memory space bugs
@ 2006-02-22  3:01 Zhang, Yanmin
  0 siblings, 0 replies; 20+ messages in thread
From: Zhang, Yanmin @ 2006-02-22  3:01 UTC (permalink / raw)
  To: David Gibson, Chen, Kenneth W; +Cc: linux-ia64, linux-kernel

>>-----Original Message-----
>>From: linux-ia64-owner@vger.kernel.org [mailto:linux-ia64-owner@vger.kernel.org] On Behalf Of 'David Gibson'
>>Sent: 2006年2月22日 10:26
>>To: Chen, Kenneth W
>>Cc: linux-ia64@vger.kernel.org; linux-kernel@vger.kernel.org
>>Subject: Re: IA64 non-contiguous memory space bugs
>>
>>Your patch below is insufficient, because there's a second test of
>>is_hugepage_only_range() further down.  However, instead of tweaking
>>the tested ranges, I think what we really want to do is check for
>>is_vm_hugetlb_page() instead.
>>
>>I was worried before, but now that you point out it's the 'end'
>>address which really matters, not the ceiling, that might be
>>sufficient.  Um.. except that hugepages, unlike normal pages these
>>days don't necessarily clean up all possible pagetable pages on
>>unmap...  crud.  Still the patch below ought to be an improvement.
>>
>>Index: working-2.6/mm/memory.c
>>===================================================================
>>--- working-2.6.orig/mm/memory.c	2006-02-22 10:42:14.000000000 +1100
>>+++ working-2.6/mm/memory.c	2006-02-22 13:22:07.000000000 +1100
>>@@ -277,7 +277,7 @@ void free_pgtables(struct mmu_gather **t
>> 		anon_vma_unlink(vma);
>> 		unlink_file_vma(vma);
>>
>>-		if (is_hugepage_only_range(vma->vm_mm, addr, HPAGE_SIZE)) {
>>+		if (is_vm_hugetlb_page(vma)) {
>> 			hugetlb_free_pgd_range(tlb, addr, vma->vm_end,
>> 				floor, next? next->vm_start: ceiling);
>> 		} else {
>>@@ -285,8 +285,7 @@ void free_pgtables(struct mmu_gather **t
>> 			 * Optimization: gather nearby vmas into one call down
>> 			 */
>> 			while (next && next->vm_start <= vma->vm_end + PMD_SIZE
>>-			  && !is_hugepage_only_range(vma->vm_mm, next->vm_start,
>>-							HPAGE_SIZE)) {
>>+			       && !is_vm_hugetlb_page(vma->vm_mm)) {
is_vm_hugetlb_page(vma->vm_mm) should be is_vm_hugetlb_page(vma)?


^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2006-02-24  2:47 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-02-22  0:13 IA64 non-contiguous memory space bugs David Gibson
2006-02-22  0:39 ` David S. Miller
2006-02-22  2:17   ` David Gibson
2006-02-22  1:31 ` Chen, Kenneth W
2006-02-22  1:51   ` Chen, Kenneth W
2006-02-22  2:25     ` 'David Gibson'
2006-02-22  2:45       ` Chen, Kenneth W
2006-02-22 16:35       ` Hugh Dickins
2006-02-22 23:49         ` 'David Gibson'
2006-02-23 20:13           ` Hugh Dickins
2006-02-24  0:11             ` 'David Gibson'
2006-02-24  1:14               ` Chen, Kenneth W
2006-02-24  2:46                 ` 'David Gibson'
2006-02-22  2:15   ` 'David Gibson'
2006-02-22  2:53 ` Chen, Kenneth W
2006-02-22  3:55   ` David Mosberger-Tang
2006-02-22  4:02     ` David Gibson
2006-02-22 16:19 ` Chen, Kenneth W
2006-02-22 23:26   ` 'David Gibson'
2006-02-22  3:01 Zhang, Yanmin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).