All of lore.kernel.org
 help / color / mirror / Atom feed
From: Rik van Riel <riel@surriel.com>
To: linux-mm@kvack.org
Cc: akpm@linux-foundation.org, aarcange@redhat.com,
	peterz@infradead.org, minchan@gmail.com,
	kosaki.motohiro@gmail.com, andi@firstfloor.org,
	hannes@cmpxchg.org, mel@csn.ul.ie, linux-kernel@vger.kernel.org,
	Rik van Riel <riel@surriel.com>, Rik van Riel <riel@redhat.com>
Subject: [PATCH -mm v2 05/11] mm: get unmapped area from VMA tree
Date: Thu, 21 Jun 2012 17:57:09 -0400	[thread overview]
Message-ID: <1340315835-28571-6-git-send-email-riel@surriel.com> (raw)
In-Reply-To: <1340315835-28571-1-git-send-email-riel@surriel.com>

Change the generic implementations of arch_get_unmapped_area(_topdown)
to use the free space info in the VMA rbtree. This makes it possible
to find free address space in O(log(N)) complexity.

For bottom-up allocations, we pick the lowest hole that is large
enough for our allocation. For topdown allocations, we pick the
highest hole of sufficient size.

For topdown allocations, we need to keep track of the highest
mapped VMA address, because it might be below mm->mmap_base,
and we only keep track of free space to the left of each VMA
in the VMA tree.  It is tempting to try and keep track of
the free space to the right of each VMA when running in
topdown mode, but that gets us into trouble when running on
x86, where a process can switch direction in the middle of
execve.

We have to leave the mm->free_area_cache and mm->largest_hole_size
in place for now, because the architecture specific versions still
use those.  Once all the architectures have been converted over to
the new code, we can remove these variables from mm_struct.

Signed-off-by: Rik van Riel <riel@redhat.com>
---
 include/linux/mm_types.h |    1 +
 mm/mmap.c                |  241 +++++++++++++++++++++++++++++-----------------
 2 files changed, 155 insertions(+), 87 deletions(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 23bd1e2..6102c27 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -312,6 +312,7 @@ struct mm_struct {
 	unsigned long task_size;		/* size of task vm space */
 	unsigned long cached_hole_size; 	/* if non-zero, the largest hole below free_area_cache */
 	unsigned long free_area_cache;		/* first hole of size cached_hole_size or larger */
+	unsigned long highest_vm_end;		/* highest vma end address */
 	pgd_t * pgd;
 	atomic_t mm_users;			/* How many users with user space? */
 	atomic_t mm_count;			/* How many references to "struct mm_struct" (users count as 1) */
diff --git a/mm/mmap.c b/mm/mmap.c
index 95f66c5..e501b4f 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -4,6 +4,7 @@
  * Written by obz.
  *
  * Address space accounting code	<alan@lxorguk.ukuu.org.uk>
+ * Rbtree get_unmapped_area Copyright (C) 2012  Rik van Riel
  */
 
 #include <linux/slab.h>
@@ -259,6 +260,17 @@ static void adjust_free_gap(struct vm_area_struct *vma)
 	rb_augment_erase_end(&vma->vm_rb, vma_rb_augment_cb, NULL);
 }
 
+static unsigned long node_free_gap(struct rb_node *node)
+{
+	struct vm_area_struct *vma;
+
+	if (!node)
+		return 0;
+
+	vma = rb_to_vma(node);
+	return vma->free_gap;
+}
+
 /*
  * Unlink a file-based vm structure from its prio_tree, to hide
  * vma from rmap and vmtruncate before freeing its page tables.
@@ -395,12 +407,16 @@ void validate_mm(struct mm_struct *mm)
 	int bug = 0;
 	int i = 0;
 	struct vm_area_struct *tmp = mm->mmap;
+	unsigned long highest_address = 0;
 	while (tmp) {
 		if (tmp->free_gap != largest_free_gap(&tmp->vm_rb))
 			printk("free space %lx, correct %lx\n", tmp->free_gap, largest_free_gap(&tmp->vm_rb)), bug = 1;
+		highest_address = tmp->vm_end;
 		tmp = tmp->vm_next;
 		i++;
 	}
+	if (highest_address != mm->highest_vm_end)
+		printk("mm->highest_vm_end %lx, found %lx\n", mm->highest_vm_end, highest_address), bug = 1;
 	if (i != mm->map_count)
 		printk("map_count %d vm_next %d\n", mm->map_count, i), bug = 1;
 	i = browse_rb(&mm->mm_rb);
@@ -458,6 +474,9 @@ void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
 	/* Propagate the new free gap between next and us up the tree. */
 	if (vma->vm_next)
 		adjust_free_gap(vma->vm_next);
+	else
+		/* This is the VMA with the highest address. */
+		mm->highest_vm_end = vma->vm_end;
 }
 
 static void __vma_link_file(struct vm_area_struct *vma)
@@ -661,6 +680,8 @@ again:			remove_next = 1 + (end > next->vm_end);
 		end_changed = true;
 	}
 	vma->vm_pgoff = pgoff;
+	if (!next)
+		mm->highest_vm_end = end;
 	if (adjust_next) {
 		next->vm_start += adjust_next << PAGE_SHIFT;
 		next->vm_pgoff += adjust_next;
@@ -1478,8 +1499,9 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 		unsigned long len, unsigned long pgoff, unsigned long flags)
 {
 	struct mm_struct *mm = current->mm;
-	struct vm_area_struct *vma;
-	unsigned long start_addr;
+	struct vm_area_struct *vma = NULL;
+	struct rb_node *rb_node;
+	unsigned long lower_limit = TASK_UNMAPPED_BASE;
 
 	if (len > TASK_SIZE)
 		return -ENOMEM;
@@ -1494,40 +1516,76 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 		    (!vma || addr + len <= vma->vm_start))
 			return addr;
 	}
-	if (len > mm->cached_hole_size) {
-	        start_addr = addr = mm->free_area_cache;
-	} else {
-	        start_addr = addr = TASK_UNMAPPED_BASE;
-	        mm->cached_hole_size = 0;
-	}
 
-full_search:
-	for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
-		/* At this point:  (!vma || addr < vma->vm_end). */
-		if (TASK_SIZE - len < addr) {
-			/*
-			 * Start a new search - just in case we missed
-			 * some holes.
-			 */
-			if (start_addr != TASK_UNMAPPED_BASE) {
-				addr = TASK_UNMAPPED_BASE;
-			        start_addr = addr;
-				mm->cached_hole_size = 0;
-				goto full_search;
+	/* Find the left-most free area of sufficient size. */
+	for (addr = 0, rb_node = mm->mm_rb.rb_node; rb_node; ) {
+		unsigned long vma_start;
+		bool found_here = false;
+
+		vma = rb_to_vma(rb_node);
+
+		if (vma->vm_start > len) {
+			if (!vma->vm_prev) {
+				/* This is the left-most VMA. */
+				if (vma->vm_start - len >= lower_limit) {
+					addr = lower_limit;
+					goto found_addr;
+				}
+			} else {
+				/* Is this gap large enough? Remember it. */
+				vma_start = max(vma->vm_prev->vm_end, lower_limit);
+				if (vma->vm_start - len >= vma_start) {
+					addr = vma_start;
+					found_here = true;
+				}
+			}
+
+			/* Go left if it looks promising. */
+			if (node_free_gap(rb_node->rb_left) >= len &&
+					vma->vm_start - len >= lower_limit) {
+				rb_node = rb_node->rb_left;
+				continue;
 			}
-			return -ENOMEM;
 		}
-		if (!vma || addr + len <= vma->vm_start) {
-			/*
-			 * Remember the place where we stopped the search:
-			 */
-			mm->free_area_cache = addr + len;
-			return addr;
+
+		if (!found_here && node_free_gap(rb_node->rb_right) >= len) {
+			/* Last known gap is to the right of this subtree. */
+			rb_node = rb_node->rb_right;
+			continue;
+		} else if (!addr) {
+			rb_node = rb_find_next_uncle(rb_node);
+			continue;
 		}
-		if (addr + mm->cached_hole_size < vma->vm_start)
-		        mm->cached_hole_size = vma->vm_start - addr;
-		addr = vma->vm_end;
+
+		/* This is the left-most gap. */
+		goto found_addr;
 	}
+
+	/*
+	 * There is not enough space to the left of any VMA.
+	 * Check the far right-hand side of the VMA tree.
+	 */
+	rb_node = mm->mm_rb.rb_node;
+	while (rb_node->rb_right)
+		rb_node = rb_node->rb_right;
+	vma = rb_to_vma(rb_node);
+	addr = vma->vm_end;
+
+	/*
+	 * The right-most VMA ends below the lower limit. Can only happen
+	 * if a binary personality loads the stack below the executable.
+	 */
+	if (addr < lower_limit)
+		addr = lower_limit;
+
+ found_addr:
+	if (TASK_SIZE - len < addr)
+		return -ENOMEM;
+
+	/* This "free area" was not really free. Tree corrupted? */
+	VM_BUG_ON(find_vma_intersection(mm, addr, addr+len));
+
+	return addr;
 }
 #endif	
 
@@ -1550,9 +1608,11 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 			  const unsigned long len, const unsigned long pgoff,
 			  const unsigned long flags)
 {
-	struct vm_area_struct *vma;
+	struct vm_area_struct *vma = NULL;
 	struct mm_struct *mm = current->mm;
-	unsigned long addr = addr0, start_addr;
+	unsigned long addr = addr0;
+	struct rb_node *rb_node = NULL;
+	unsigned long upper_limit = mm->mmap_base;
 
 	/* requested length too big for entire address space */
 	if (len > TASK_SIZE)
@@ -1570,68 +1630,65 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 			return addr;
 	}
 
-	/* check if free_area_cache is useful for us */
-	if (len <= mm->cached_hole_size) {
- 	        mm->cached_hole_size = 0;
- 		mm->free_area_cache = mm->mmap_base;
- 	}
+	/* requested length too big; prevent integer underflow below */
+	if (len > upper_limit)
+		return -ENOMEM;
 
-try_again:
-	/* either no address requested or can't fit in requested address hole */
-	start_addr = addr = mm->free_area_cache;
+	/*
+	 * Does the highest VMA end far enough below the upper limit
+	 * of our search space?
+	 */
+	if (upper_limit - len > mm->highest_vm_end) {
+		addr = upper_limit - len;
+		goto found_addr;
+	}
 
-	if (addr < len)
-		goto fail;
+	/* Find the right-most free area of sufficient size. */
+	for (addr = 0, rb_node = mm->mm_rb.rb_node; rb_node; ) {
+		unsigned long vma_start;
+		bool found_here = false;
 
-	addr -= len;
-	do {
-		/*
-		 * Lookup failure means no vma is above this address,
-		 * else if new region fits below vma->vm_start,
-		 * return with success:
-		 */
-		vma = find_vma(mm, addr);
-		if (!vma || addr+len <= vma->vm_start)
-			/* remember the address as a hint for next time */
-			return (mm->free_area_cache = addr);
+		vma = rb_to_vma(rb_node);
+
+		/* Is this gap large enough? Remember it. */
+		vma_start = min(vma->vm_start, upper_limit);
+		if (vma_start > len) {
+			if (!vma->vm_prev ||
+			    (vma_start - len >= vma->vm_prev->vm_end)) {
+				addr = vma_start - len;
+				found_here = true;
+			}
+		}
 
- 		/* remember the largest hole we saw so far */
- 		if (addr + mm->cached_hole_size < vma->vm_start)
- 		        mm->cached_hole_size = vma->vm_start - addr;
+		/* Go right if it looks promising. */
+		if (node_free_gap(rb_node->rb_right) >= len) {
+			if (upper_limit - len > vma->vm_end) {
+				rb_node = rb_node->rb_right;
+				continue;
+			}
+		}
 
-		/* try just below the current vma->vm_start */
-		addr = vma->vm_start-len;
-	} while (len < vma->vm_start);
+		if (!found_here && node_free_gap(rb_node->rb_left) >= len) {
+			/* Last known gap is to the right of this subtree. */
+			rb_node = rb_node->rb_left;
+			continue;
+		} else if (!addr) {
+			rb_node = rb_find_prev_uncle(rb_node);
+			continue;
+		}
 
-fail:
-	/*
-	 * if hint left us with no space for the requested
-	 * mapping then try again:
-	 *
-	 * Note: this is different with the case of bottomup
-	 * which does the fully line-search, but we use find_vma
-	 * here that causes some holes skipped.
-	 */
-	if (start_addr != mm->mmap_base) {
-		mm->free_area_cache = mm->mmap_base;
-		mm->cached_hole_size = 0;
-		goto try_again;
+		/* This is the right-most gap. */
+		goto found_addr;
 	}
 
-	/*
-	 * A failed mmap() very likely causes application failure,
-	 * so fall back to the bottom-up function here. This scenario
-	 * can happen with large stack limits and large mmap()
-	 * allocations.
-	 */
-	mm->cached_hole_size = ~0UL;
-  	mm->free_area_cache = TASK_UNMAPPED_BASE;
-	addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
-	/*
-	 * Restore the topdown base:
-	 */
-	mm->free_area_cache = mm->mmap_base;
-	mm->cached_hole_size = ~0UL;
+	return -ENOMEM;
+
+ found_addr:
+	if (TASK_SIZE - len < addr)
+		return -ENOMEM;
+
+	/* This "free area" was not really free. Tree corrupted? */
+	VM_BUG_ON(find_vma_intersection(mm, addr, addr+len));
 
 	return addr;
 }
@@ -1845,6 +1902,9 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 				vma->vm_end = address;
 				if (vma->vm_next)
 					adjust_free_gap(vma->vm_next);
+				else
+					vma->vm_mm->highest_vm_end =
+								vma->vm_end;
 				perf_event_mmap(vma);
 			}
 		}
@@ -2028,6 +2088,13 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
 	*insertion_point = vma;
 	if (vma)
 		vma->vm_prev = prev;
+	else {
+		/* We just unmapped the highest VMA. */
+		if (prev)
+			mm->highest_vm_end = prev->vm_end;
+		else
+			mm->highest_vm_end = 0;
+	}
 	if (vma)
 		rb_augment_erase_end(&vma->vm_rb, vma_rb_augment_cb, NULL);
 	tail_vma->vm_next = NULL;
-- 
1.7.7.6


WARNING: multiple messages have this Message-ID (diff)
From: Rik van Riel <riel@surriel.com>
To: linux-mm@kvack.org
Cc: akpm@linux-foundation.org, aarcange@redhat.com,
	peterz@infradead.org, minchan@gmail.com,
	kosaki.motohiro@gmail.com, andi@firstfloor.org,
	hannes@cmpxchg.org, mel@csn.ul.ie, linux-kernel@vger.kernel.org,
	Rik van Riel <riel@surriel.com>, Rik van Riel <riel@redhat.com>
Subject: [PATCH -mm v2 05/11] mm: get unmapped area from VMA tree
Date: Thu, 21 Jun 2012 17:57:09 -0400	[thread overview]
Message-ID: <1340315835-28571-6-git-send-email-riel@surriel.com> (raw)
In-Reply-To: <1340315835-28571-1-git-send-email-riel@surriel.com>

Change the generic implementations of arch_get_unmapped_area(_topdown)
to use the free space info in the VMA rbtree. This makes it possible
to find free address space in O(log(N)) complexity.

For bottom-up allocations, we pick the lowest hole that is large
enough for our allocation. For topdown allocations, we pick the
highest hole of sufficient size.

For topdown allocations, we need to keep track of the highest
mapped VMA address, because it might be below mm->mmap_base,
and we only keep track of free space to the left of each VMA
in the VMA tree.  It is tempting to try and keep track of
the free space to the right of each VMA when running in
topdown mode, but that gets us into trouble when running on
x86, where a process can switch direction in the middle of
execve.

We have to leave the mm->free_area_cache and mm->largest_hole_size
in place for now, because the architecture specific versions still
use those.  Once all the architectures have been converted over to
the new code, we can remove these variables from mm_struct.

Signed-off-by: Rik van Riel <riel@redhat.com>
---
 include/linux/mm_types.h |    1 +
 mm/mmap.c                |  241 +++++++++++++++++++++++++++++-----------------
 2 files changed, 155 insertions(+), 87 deletions(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 23bd1e2..6102c27 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -312,6 +312,7 @@ struct mm_struct {
 	unsigned long task_size;		/* size of task vm space */
 	unsigned long cached_hole_size; 	/* if non-zero, the largest hole below free_area_cache */
 	unsigned long free_area_cache;		/* first hole of size cached_hole_size or larger */
+	unsigned long highest_vm_end;		/* highest vma end address */
 	pgd_t * pgd;
 	atomic_t mm_users;			/* How many users with user space? */
 	atomic_t mm_count;			/* How many references to "struct mm_struct" (users count as 1) */
diff --git a/mm/mmap.c b/mm/mmap.c
index 95f66c5..e501b4f 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -4,6 +4,7 @@
  * Written by obz.
  *
  * Address space accounting code	<alan@lxorguk.ukuu.org.uk>
+ * Rbtree get_unmapped_area Copyright (C) 2012  Rik van Riel
  */
 
 #include <linux/slab.h>
@@ -259,6 +260,17 @@ static void adjust_free_gap(struct vm_area_struct *vma)
 	rb_augment_erase_end(&vma->vm_rb, vma_rb_augment_cb, NULL);
 }
 
+static unsigned long node_free_gap(struct rb_node *node)
+{
+	struct vm_area_struct *vma;
+
+	if (!node)
+		return 0;
+
+	vma = rb_to_vma(node);
+	return vma->free_gap;
+}
+
 /*
  * Unlink a file-based vm structure from its prio_tree, to hide
  * vma from rmap and vmtruncate before freeing its page tables.
@@ -395,12 +407,16 @@ void validate_mm(struct mm_struct *mm)
 	int bug = 0;
 	int i = 0;
 	struct vm_area_struct *tmp = mm->mmap;
+	unsigned long highest_address = 0;
 	while (tmp) {
 		if (tmp->free_gap != largest_free_gap(&tmp->vm_rb))
 			printk("free space %lx, correct %lx\n", tmp->free_gap, largest_free_gap(&tmp->vm_rb)), bug = 1;
+		highest_address = tmp->vm_end;
 		tmp = tmp->vm_next;
 		i++;
 	}
+	if (highest_address != mm->highest_vm_end)
+		printk("mm->highest_vm_end %lx, found %lx\n", mm->highest_vm_end, highest_address), bug = 1;
 	if (i != mm->map_count)
 		printk("map_count %d vm_next %d\n", mm->map_count, i), bug = 1;
 	i = browse_rb(&mm->mm_rb);
@@ -458,6 +474,9 @@ void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
 	/* Propagate the new free gap between next and us up the tree. */
 	if (vma->vm_next)
 		adjust_free_gap(vma->vm_next);
+	else
+		/* This is the VMA with the highest address. */
+		mm->highest_vm_end = vma->vm_end;
 }
 
 static void __vma_link_file(struct vm_area_struct *vma)
@@ -661,6 +680,8 @@ again:			remove_next = 1 + (end > next->vm_end);
 		end_changed = true;
 	}
 	vma->vm_pgoff = pgoff;
+	if (!next)
+		mm->highest_vm_end = end;
 	if (adjust_next) {
 		next->vm_start += adjust_next << PAGE_SHIFT;
 		next->vm_pgoff += adjust_next;
@@ -1478,8 +1499,9 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 		unsigned long len, unsigned long pgoff, unsigned long flags)
 {
 	struct mm_struct *mm = current->mm;
-	struct vm_area_struct *vma;
-	unsigned long start_addr;
+	struct vm_area_struct *vma = NULL;
+	struct rb_node *rb_node;
+	unsigned long lower_limit = TASK_UNMAPPED_BASE;
 
 	if (len > TASK_SIZE)
 		return -ENOMEM;
@@ -1494,40 +1516,76 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 		    (!vma || addr + len <= vma->vm_start))
 			return addr;
 	}
-	if (len > mm->cached_hole_size) {
-	        start_addr = addr = mm->free_area_cache;
-	} else {
-	        start_addr = addr = TASK_UNMAPPED_BASE;
-	        mm->cached_hole_size = 0;
-	}
 
-full_search:
-	for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
-		/* At this point:  (!vma || addr < vma->vm_end). */
-		if (TASK_SIZE - len < addr) {
-			/*
-			 * Start a new search - just in case we missed
-			 * some holes.
-			 */
-			if (start_addr != TASK_UNMAPPED_BASE) {
-				addr = TASK_UNMAPPED_BASE;
-			        start_addr = addr;
-				mm->cached_hole_size = 0;
-				goto full_search;
+	/* Find the left-most free area of sufficient size. */
+	for (addr = 0, rb_node = mm->mm_rb.rb_node; rb_node; ) {
+		unsigned long vma_start;
+		bool found_here = false;
+
+		vma = rb_to_vma(rb_node);
+
+		if (vma->vm_start > len) {
+			if (!vma->vm_prev) {
+				/* This is the left-most VMA. */
+				if (vma->vm_start - len >= lower_limit) {
+					addr = lower_limit;
+					goto found_addr;
+				}
+			} else {
+				/* Is this gap large enough? Remember it. */
+				vma_start = max(vma->vm_prev->vm_end, lower_limit);
+				if (vma->vm_start - len >= vma_start) {
+					addr = vma_start;
+					found_here = true;
+				}
+			}
+
+			/* Go left if it looks promising. */
+			if (node_free_gap(rb_node->rb_left) >= len &&
+					vma->vm_start - len >= lower_limit) {
+				rb_node = rb_node->rb_left;
+				continue;
 			}
-			return -ENOMEM;
 		}
-		if (!vma || addr + len <= vma->vm_start) {
-			/*
-			 * Remember the place where we stopped the search:
-			 */
-			mm->free_area_cache = addr + len;
-			return addr;
+
+		if (!found_here && node_free_gap(rb_node->rb_right) >= len) {
+			/* Last known gap is to the right of this subtree. */
+			rb_node = rb_node->rb_right;
+			continue;
+		} else if (!addr) {
+			rb_node = rb_find_next_uncle(rb_node);
+			continue;
 		}
-		if (addr + mm->cached_hole_size < vma->vm_start)
-		        mm->cached_hole_size = vma->vm_start - addr;
-		addr = vma->vm_end;
+
+		/* This is the left-most gap. */
+		goto found_addr;
 	}
+
+	/*
+	 * There is not enough space to the left of any VMA.
+	 * Check the far right-hand side of the VMA tree.
+	 */
+	rb_node = mm->mm_rb.rb_node;
+	while (rb_node->rb_right)
+		rb_node = rb_node->rb_right;
+	vma = rb_to_vma(rb_node);
+	addr = vma->vm_end;
+
+	/*
+	 * The right-most VMA ends below the lower limit. Can only happen
+	 * if a binary personality loads the stack below the executable.
+	 */
+	if (addr < lower_limit)
+		addr = lower_limit;
+
+ found_addr:
+	if (TASK_SIZE - len < addr)
+		return -ENOMEM;
+
+	/* This "free area" was not really free. Tree corrupted? */
+	VM_BUG_ON(find_vma_intersection(mm, addr, addr+len));
+
+	return addr;
 }
 #endif	
 
@@ -1550,9 +1608,11 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 			  const unsigned long len, const unsigned long pgoff,
 			  const unsigned long flags)
 {
-	struct vm_area_struct *vma;
+	struct vm_area_struct *vma = NULL;
 	struct mm_struct *mm = current->mm;
-	unsigned long addr = addr0, start_addr;
+	unsigned long addr = addr0;
+	struct rb_node *rb_node = NULL;
+	unsigned long upper_limit = mm->mmap_base;
 
 	/* requested length too big for entire address space */
 	if (len > TASK_SIZE)
@@ -1570,68 +1630,65 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 			return addr;
 	}
 
-	/* check if free_area_cache is useful for us */
-	if (len <= mm->cached_hole_size) {
- 	        mm->cached_hole_size = 0;
- 		mm->free_area_cache = mm->mmap_base;
- 	}
+	/* requested length too big; prevent integer underflow below */
+	if (len > upper_limit)
+		return -ENOMEM;
 
-try_again:
-	/* either no address requested or can't fit in requested address hole */
-	start_addr = addr = mm->free_area_cache;
+	/*
+	 * Does the highest VMA end far enough below the upper limit
+	 * of our search space?
+	 */
+	if (upper_limit - len > mm->highest_vm_end) {
+		addr = upper_limit - len;
+		goto found_addr;
+	}
 
-	if (addr < len)
-		goto fail;
+	/* Find the right-most free area of sufficient size. */
+	for (addr = 0, rb_node = mm->mm_rb.rb_node; rb_node; ) {
+		unsigned long vma_start;
+		bool found_here = false;
 
-	addr -= len;
-	do {
-		/*
-		 * Lookup failure means no vma is above this address,
-		 * else if new region fits below vma->vm_start,
-		 * return with success:
-		 */
-		vma = find_vma(mm, addr);
-		if (!vma || addr+len <= vma->vm_start)
-			/* remember the address as a hint for next time */
-			return (mm->free_area_cache = addr);
+		vma = rb_to_vma(rb_node);
+
+		/* Is this gap large enough? Remember it. */
+		vma_start = min(vma->vm_start, upper_limit);
+		if (vma_start > len) {
+			if (!vma->vm_prev ||
+			    (vma_start - len >= vma->vm_prev->vm_end)) {
+				addr = vma_start - len;
+				found_here = true;
+			}
+		}
 
- 		/* remember the largest hole we saw so far */
- 		if (addr + mm->cached_hole_size < vma->vm_start)
- 		        mm->cached_hole_size = vma->vm_start - addr;
+		/* Go right if it looks promising. */
+		if (node_free_gap(rb_node->rb_right) >= len) {
+			if (upper_limit - len > vma->vm_end) {
+				rb_node = rb_node->rb_right;
+				continue;
+			}
+		}
 
-		/* try just below the current vma->vm_start */
-		addr = vma->vm_start-len;
-	} while (len < vma->vm_start);
+		if (!found_here && node_free_gap(rb_node->rb_left) >= len) {
+			/* Last known gap is to the right of this subtree. */
+			rb_node = rb_node->rb_left;
+			continue;
+		} else if (!addr) {
+			rb_node = rb_find_prev_uncle(rb_node);
+			continue;
+		}
 
-fail:
-	/*
-	 * if hint left us with no space for the requested
-	 * mapping then try again:
-	 *
-	 * Note: this is different with the case of bottomup
-	 * which does the fully line-search, but we use find_vma
-	 * here that causes some holes skipped.
-	 */
-	if (start_addr != mm->mmap_base) {
-		mm->free_area_cache = mm->mmap_base;
-		mm->cached_hole_size = 0;
-		goto try_again;
+		/* This is the right-most gap. */
+		goto found_addr;
 	}
 
-	/*
-	 * A failed mmap() very likely causes application failure,
-	 * so fall back to the bottom-up function here. This scenario
-	 * can happen with large stack limits and large mmap()
-	 * allocations.
-	 */
-	mm->cached_hole_size = ~0UL;
-  	mm->free_area_cache = TASK_UNMAPPED_BASE;
-	addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
-	/*
-	 * Restore the topdown base:
-	 */
-	mm->free_area_cache = mm->mmap_base;
-	mm->cached_hole_size = ~0UL;
+	return -ENOMEM;
+
+ found_addr:
+	if (TASK_SIZE - len < addr)
+		return -ENOMEM;
+
+	/* This "free area" was not really free. Tree corrupted? */
+	VM_BUG_ON(find_vma_intersection(mm, addr, addr+len));
 
 	return addr;
 }
@@ -1845,6 +1902,9 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 				vma->vm_end = address;
 				if (vma->vm_next)
 					adjust_free_gap(vma->vm_next);
+				else
+					vma->vm_mm->highest_vm_end =
+								vma->vm_end;
 				perf_event_mmap(vma);
 			}
 		}
@@ -2028,6 +2088,13 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
 	*insertion_point = vma;
 	if (vma)
 		vma->vm_prev = prev;
+	else {
+		/* We just unmapped the highest VMA. */
+		if (prev)
+			mm->highest_vm_end = prev->vm_end;
+		else
+			mm->highest_vm_end = 0;
+	}
 	if (vma)
 		rb_augment_erase_end(&vma->vm_rb, vma_rb_augment_cb, NULL);
 	tail_vma->vm_next = NULL;
-- 
1.7.7.6

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2012-06-21 21:59 UTC|newest]

Thread overview: 90+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-06-21 21:57 [PATCH -mm v2 00/11] mm: scalable and unified arch_get_unmapped_area Rik van Riel
2012-06-21 21:57 ` Rik van Riel
2012-06-21 21:57 ` [PATCH -mm v2 01/11] mm: track free size between VMAs in VMA rbtree Rik van Riel
2012-06-21 21:57   ` Rik van Riel
2012-06-22  9:57   ` Peter Zijlstra
2012-06-22  9:57     ` Peter Zijlstra
2012-06-22  9:58   ` Peter Zijlstra
2012-06-22  9:58     ` Peter Zijlstra
2012-06-22 14:11     ` Rik van Riel
2012-06-22 14:11       ` Rik van Riel
2012-06-22 14:13       ` Peter Zijlstra
2012-06-22 14:13         ` Peter Zijlstra
2012-06-22 14:25         ` Rik van Riel
2012-06-22 14:25           ` Rik van Riel
2012-06-22 14:37           ` Peter Zijlstra
2012-06-22 14:37             ` Peter Zijlstra
2012-06-22 15:41             ` Rik van Riel
2012-06-22 15:41               ` Rik van Riel
2012-06-25 19:29               ` Peter Zijlstra
2012-06-25 19:29                 ` Peter Zijlstra
2012-06-25 21:52                 ` Rik van Riel
2012-06-25 21:52                   ` Rik van Riel
2012-06-26  8:31                   ` Peter Zijlstra
2012-06-26  8:31                     ` Peter Zijlstra
2012-06-26 13:05                     ` Rik van Riel
2012-06-26 13:05                       ` Rik van Riel
2012-06-26 13:45                       ` Peter Zijlstra
2012-06-26 13:45                         ` Peter Zijlstra
2012-06-26 15:49                         ` Rik van Riel
2012-06-26 15:49                           ` Rik van Riel
2012-06-27 12:27                           ` Peter Zijlstra
2012-06-27 12:27                             ` Peter Zijlstra
2012-06-26  8:37                   ` Peter Zijlstra
2012-06-26  8:37                     ` Peter Zijlstra
2012-06-22 10:02   ` Peter Zijlstra
2012-06-22 10:02     ` Peter Zijlstra
2012-06-29 23:46   ` Michel Lespinasse
2012-06-29 23:46     ` Michel Lespinasse
2012-07-03 21:37     ` Rik van Riel
2012-07-03 21:37       ` Rik van Riel
2012-07-03 23:16       ` Michel Lespinasse
2012-07-03 23:16         ` Michel Lespinasse
2012-07-04 10:12         ` Peter Zijlstra
2012-07-04 10:12           ` Peter Zijlstra
2012-06-21 21:57 ` [PATCH -mm v2 02/11] mm: rearrange vm_area_struct for fewer cache misses Rik van Riel
2012-06-21 21:57   ` Rik van Riel
2012-06-21 21:57 ` [PATCH -mm v2 03/11] mm: vma_adjust: only call adjust_free_gap when needed Rik van Riel
2012-06-21 21:57   ` Rik van Riel
2012-06-21 21:57 ` [PATCH -mm v2 04/11] rbtree: add helpers to find nearest uncle node Rik van Riel
2012-06-21 21:57   ` Rik van Riel
2012-06-22  9:49   ` Peter Zijlstra
2012-06-22  9:49     ` Peter Zijlstra
2012-06-21 21:57 ` Rik van Riel [this message]
2012-06-21 21:57   ` [PATCH -mm v2 05/11] mm: get unmapped area from VMA tree Rik van Riel
2012-06-30  1:33   ` Michel Lespinasse
2012-06-30  1:33     ` Michel Lespinasse
2012-07-03  0:23     ` Michel Lespinasse
2012-07-03  0:23       ` Michel Lespinasse
2012-06-30  2:42   ` Michel Lespinasse
2012-06-30  2:42     ` Michel Lespinasse
2012-06-21 21:57 ` [PATCH -mm v2 06/11] mm: arbitrary address ranges for arch_get_unmapped_area Rik van Riel
2012-06-21 21:57   ` Rik van Riel
2012-06-21 21:57 ` [PATCH -mm v2 07/11] mm: make cache alignment code generic Rik van Riel
2012-06-21 21:57   ` Rik van Riel
2012-06-30  2:22   ` Michel Lespinasse
2012-06-30  2:22     ` Michel Lespinasse
2012-06-21 21:57 ` [PATCH -mm v2 08/11] mm: remove x86 arch_get_unmapped_area(_topdown) Rik van Riel
2012-06-21 21:57   ` Rik van Riel
2012-06-21 21:57 ` [PATCH -mm v2 09/11] mm: remove MIPS arch_get_unmapped_area code Rik van Riel
2012-06-21 21:57   ` Rik van Riel
2012-06-21 21:57 ` [PATCH -mm v2 10/11] mm: remove ARM arch_get_unmapped_area functions Rik van Riel
2012-06-21 21:57   ` Rik van Riel
2012-06-22 22:27   ` Russell King - ARM Linux
2012-06-22 22:27     ` Russell King - ARM Linux
2012-06-23 17:50     ` Johannes Weiner
2012-06-23 17:50       ` Johannes Weiner
2012-06-21 21:57 ` [PATCH -mm v2 11/11] mm: remove SH " Rik van Riel
2012-06-21 21:57   ` Rik van Riel
2012-06-21 21:57   ` Rik van Riel
2012-06-25  2:11   ` Paul Mundt
2012-06-25  2:11     ` Paul Mundt
2012-06-25  2:11     ` Paul Mundt
2012-06-22 14:24 ` [PATCH -mm v2 00/11] mm: scalable and unified arch_get_unmapped_area John Stoffel
2012-06-22 14:24   ` John Stoffel
2012-06-22 21:47   ` Andrew Morton
2012-06-22 21:47     ` Andrew Morton
2012-06-23 16:03     ` John Stoffel
2012-06-23 16:03       ` John Stoffel
2012-06-22 15:01 ` Johannes Weiner
2012-06-22 15:01   ` Johannes Weiner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1340315835-28571-6-git-send-email-riel@surriel.com \
    --to=riel@surriel.com \
    --cc=aarcange@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=andi@firstfloor.org \
    --cc=hannes@cmpxchg.org \
    --cc=kosaki.motohiro@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mel@csn.ul.ie \
    --cc=minchan@gmail.com \
    --cc=peterz@infradead.org \
    --cc=riel@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.